In [10]:
from PolishSystem.OperatorsBasedOnSimilarities.similarities_utils import gian_get_similarities
from PolishSystem.read_data import get_pRef_from_vectors
import os

dir_250 = r"C:\Users\gac8\PycharmProjects\PSSearch\data\retail_forecasting\250"

def in_250(path):
    return os.path.join(dir_250, path)

genome_size = 250
clustering_method = "kmeans"

pRef = get_pRef_from_vectors(name_of_vectors_file=in_250(f"hierarchical\\many_hot_vectors_{genome_size}_{clustering_method}.csv"),
                             name_of_fitness_file=in_250(f"hierarchical\\fitness_{genome_size}_{clustering_method}.csv"),
                             column_in_fitness_file=2)

train_pRef, test_pRef = pRef.train_test_split(test_size=0.2)
cluster_info_file_name = in_250(f"cluster_info_250_qmc.pkl")
similarities = gian_get_similarities(cluster_info_file_name)

In [13]:
import utils
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

# XGBoost requires separate install: pip install xgboost
from xgboost import XGBRegressor

X = pRef.full_solution_matrix
y = pRef.fitness_array

datapoint_quantity, genome_size = X.shape



# Assume X and y are already defined numpy arrays
# X: shape (n_samples, 250), values are 0 or 1
# y: shape (n_samples,)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    #"Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42, verbosity=0),
    "Neural Network": MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=100, random_state=42),
    
}

# Train and evaluate models
for name, model in models.items():
    with utils.announce(f"Training {name}"):
        model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    with utils.announce(f"Testing {name}"):
        mse = mean_squared_error(y_test, y_pred)

    print(f"{name} Performance:")
    print(f"  R² Score: {r2:.4f}")
    print(f"  MSE: {mse:.4f}\n")



Training Linear Regression......Finished (took 0.573989 seconds)
Testing Linear Regression......Finished (took 0.000000 seconds)
Linear Regression Performance:
  R² Score: 0.0685
  MSE: 6.3714

Training Decision Tree......Finished (took 5.715582 seconds)
Testing Decision Tree......Finished (took 0.000000 seconds)
Decision Tree Performance:
  R² Score: -0.7578
  MSE: 12.0232

Training XGBoost......Finished (took 3.954611 seconds)
Testing XGBoost......Finished (took 0.008010 seconds)
XGBoost Performance:
  R² Score: 0.0692
  MSE: 6.3666

Training Neural Network......Finished (took 29.901798 seconds)
Testing Neural Network......Finished (took 0.000000 seconds)
Neural Network Performance:
  R² Score: -0.3612
  MSE: 9.3106


