In [15]:

import json
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score


In [16]:
def parse_json(json_file):
    with open(json_file, 'r') as f:
        json_data = json.load(f)
        # Extract relevant information from JSON
        dataset_name = json_data['dataset_name']
        target_variable = json_data['target_variable']
        prediction_type = json_data['prediction_type']
        feature_handling = json_data['feature_handling']
        feature_reduction_method = json_data['feature_reduction_method']
        selected_algorithms = json_data['selected_algorithms']
        hyperparameters = json_data['hyperparameters']
        return dataset_name, target_variable, prediction_type, feature_handling, feature_reduction_method, selected_algorithms, hyperparameters

    
def load_and_preprocess_data(csv_file):
     # Load dataset
     data = pd.read_csv(csv_file)
     # Handle missing values if any
     data.dropna(inplace=True)
     # Encode categorical variables if any
     # Split data into features and target variable
     X = data.drop(columns=[target_variable])
     y = data[target_variable]
     # Split data into train and test sets
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
     # Perform feature scaling
     scaler = StandardScaler()
     X_train_scaled = scaler.fit_transform(X_train)
     X_test_scaled = scaler.transform(X_test)
     return X_train_scaled, X_test_scaled, y_train, y_test


In [17]:
def feature_reduction(X_train, X_test, y_train, feature_reduction_method):
    if feature_reduction_method == 'tree_based':
        # Use RandomForestRegressor for feature selection
        selector = SelectFromModel(RandomForestRegressor())
        X_train_selected = selector.fit_transform(X_train, y_train)
        X_test_selected = selector.transform(X_test)
        return X_train_selected, X_test_selected
    else:
            
        return X_train, X_test # No feature reduction