In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import ExtraTreesRegressor

# Load and preprocess data
train_data = pd.read_csv('../main_dataset.csv')
train_data['Id'] = np.where(train_data['Id'] < 1e-18, 1e-18, train_data['Id'])
train_data['Log_Id'] = np.log10(train_data['Id'])
X = train_data[['Tox', 'Nd', 'Ns', 'Vds', 'Vgs']]
y = train_data['Log_Id']

poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly.fit_transform(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_poly)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Baseline parameters
baseline_params = {
    'n_estimators': 100,
    'max_depth': None,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'max_features': 1.0,
    'random_state': 42,
    'n_jobs' : -1 ,
}

# Define parameter ranges for individual tuning
param_ranges = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': [1.0, 'sqrt', 'log2', 0.5, 0.75]
}

# Function to evaluate performance of varying parameters
def evaluate_parameter(param_name, param_values):
    results = []
    for value in param_values:
        # Update the specific parameter value
        params = baseline_params.copy()
        params[param_name] = value
        
        # Train the model
        model = ExtraTreesRegressor(**params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        r2 = r2_score(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        variance = mean_squared_error(y_pred, y_test)
        
        # Store the results
        results.append((value, r2, mae, variance))
        print(f"{param_name}={value} | R2: {r2:.10f}, MAE: {mae:.10f}, Variance: {variance:.10f}")
    
    return results

# Analyze each parameter's effect
param_results = {}
for param_name, values in param_ranges.items():
    print(f"\nEvaluating {param_name}...")
    param_results[param_name] = evaluate_parameter(param_name, values)

# Example: Access results for a specific parameter
# param_results['n_estimators'] contains results for each setting of 'n_estimators'




Evaluating n_estimators...


KeyboardInterrupt: 