In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.neural_network import MLPRegressor
import random

# Load the dataset
file_path = r"C:\Users\mhtaba\Desktop\Ai_control2\data_backup_47 (2).csv"  # Update with actual file path
df = pd.read_csv(file_path)

# Stripping spaces from column names
df.columns = df.columns.str.strip()

df = df.query('Ev != 0')

In [3]:
np.random.seed(42)

numbers = np.arange(1, 1633)

# Select 10% of the elements randomly (without replacement)
sample_size = int(0.1 * len(numbers))
random_sample = np.random.choice(numbers, size=sample_size, replace=False)

In [4]:
df_filtered = df[~df['seed'].isin(random_sample)]
df_filtered_test = df[df['seed'].isin(random_sample)]

In [5]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV, KFold
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import lightgbm as lgb
import numpy as np

features = df_filtered.drop(columns=['Et', 'Ev', 'seed', 'city'])
target = df_filtered['Ev']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

cv = KFold(n_splits=5, shuffle=True, random_state=42)

et_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
lgb_params = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'num_leaves': [20, 31, 50],
    'max_depth': [-1, 10, 20]
}
ann_params = {
    'hidden_layer_sizes': [(128, 64, 32), (256, 128, 64), (64, 32)],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01],
    'max_iter': [200, 500, 1000]
}

# Randomized Grid Search and Model Training
def train_best_model(model, params, name):
    search = RandomizedSearchCV(model, params, n_iter=10, cv=cv, scoring='r2', n_jobs=-1, random_state=42)
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)
    return {
        'Best Params': search.best_params_,
        'R2 Score': r2_score(y_test, y_pred),
        'MAE': mean_absolute_error(y_test, y_pred),
        'MSE': mean_squared_error(y_test, y_pred)
    }

results = {
    'Extra Trees': train_best_model(ExtraTreesRegressor(random_state=42, n_jobs=-1), et_params, 'Extra Trees'),
    'Random Forest': train_best_model(RandomForestRegressor(random_state=42, n_jobs=-1), rf_params, 'Random Forest'),
    'LightGBM': train_best_model(lgb.LGBMRegressor(random_state=42), lgb_params, 'LightGBM'),
    'ANN': train_best_model(MLPRegressor(activation='relu', solver='adam', random_state=42), ann_params, 'ANN')
}

results_df = pd.DataFrame(results).T

results_df

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.054317 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2398
[LightGBM] [Info] Number of data points in the train set: 565200, number of used features: 20
[LightGBM] [Info] Start training from score 2233.957120


Unnamed: 0,Best Params,R2 Score,MAE,MSE
Extra Trees,"{'n_estimators': 200, 'min_samples_split': 2, ...",0.990452,90.287915,97652.515345
Random Forest,"{'n_estimators': 200, 'min_samples_split': 2, ...",0.989512,108.737436,107269.365028
LightGBM,"{'num_leaves': 31, 'n_estimators': 200, 'max_d...",0.989684,170.92999,105511.280997
ANN,"{'max_iter': 500, 'learning_rate_init': 0.001,...",0.991562,139.210874,86298.237711
