Fitting ML Models and storing the predictions

In [1]:
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor

# Step 1: Load selected_feature_data
with open("Data/selected_feature_data.pkl", "rb") as f:
    selected_feature_data = pickle.load(f)

# Step 2: Define models
models = {
    "LinearRegression": LinearRegression(),
    "KNN": KNeighborsRegressor(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "MLP": MLPRegressor(random_state=42)
}

# Step 3: Initialize output dictionary
results = {}

# Step 4: Loop through each dataset
for dataset_name, k_dict in selected_feature_data.items():
    results[dataset_name] = {}
    
    for k, split in k_dict.items():
        if "error" in split:
            results[dataset_name][k] = {"error": split["error"]}
            continue

        # Get data
        X_train = split["X_train"]
        X_val = split["X_val"]
        X_test = split["X_test"]
        y_train = split["y_train"]
        y_val = split["y_val"]
        y_test = split["y_test"]

        # Store results for this k
        results[dataset_name][k] = {}

        for model_name, model in models.items():
            try:
                model.fit(X_train, y_train)

                y_train_pred = model.predict(X_train)
                y_val_pred = model.predict(X_val)
                y_test_pred = model.predict(X_test)

                results[dataset_name][k][model_name] = {
                    "y_train": y_train,
                    "y_train_pred": y_train_pred,
                    "y_val": y_val,
                    "y_val_pred": y_val_pred,
                    "y_test": y_test,
                    "y_test_pred": y_test_pred
                }
            except Exception as e:
                results[dataset_name][k][model_name] = {"error": str(e)}

# Step 5: Save the results dictionary
with open("Data/model_predictions.pkl", "wb") as f:
    pickle.dump(results, f)

print("✅ Model predictions saved to 'Data/model_predictions.pkl'")

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

✅ Model predictions saved to 'Data/model_predictions.pkl'


Fitting models for simulated datasets

In [1]:
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor

# Step 1: Load selected_feature_data
with open("Data/selected_feature_data_simulated.pkl", "rb") as f:
    selected_feature_data = pickle.load(f)

# Step 2: Define models
models = {
    "LinearRegression": LinearRegression(),
    "KNN": KNeighborsRegressor(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "MLP": MLPRegressor(random_state=42)
}

# Step 3: Initialize output dictionary
results = {}

# Step 4: Loop through each dataset
for dataset_name, k_dict in selected_feature_data.items():
    results[dataset_name] = {}
    
    for k, split in k_dict.items():
        if "error" in split:
            results[dataset_name][k] = {"error": split["error"]}
            continue

        # Get data
        X_train = split["X_train"]
        X_val = split["X_val"]
        X_test = split["X_test"]
        y_train = split["y_train"]
        y_val = split["y_val"]
        y_test = split["y_test"]

        # Store results for this k
        results[dataset_name][k] = {}

        for model_name, model in models.items():
            try:
                model.fit(X_train, y_train)

                y_train_pred = model.predict(X_train)
                y_val_pred = model.predict(X_val)
                y_test_pred = model.predict(X_test)

                results[dataset_name][k][model_name] = {
                    "y_train": y_train,
                    "y_train_pred": y_train_pred,
                    "y_val": y_val,
                    "y_val_pred": y_val_pred,
                    "y_test": y_test,
                    "y_test_pred": y_test_pred
                }
            except Exception as e:
                results[dataset_name][k][model_name] = {"error": str(e)}

# Step 5: Save the results dictionary
with open("Data/model_predictions_simulated.pkl", "wb") as f:
    pickle.dump(results, f)

print("✅ Model predictions saved to 'Data/model_predictions_simulated.pkl'")



✅ Model predictions saved to 'Data/model_predictions_simulated.pkl'


