In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Custom round function
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the extended parameter grid
param_grid = {
    'criterion': ['squared_error'],
    'splitter': ['best'],
    'max_depth': [None, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'min_samples_split': [2, 5, 10, 15, 20],
    'min_samples_leaf': [1, 2, 4, 6, 8, 10],
    'max_features': [None, 'auto', 'sqrt', 'log2'],
    'max_leaf_nodes': [None, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'min_impurity_decrease': [0.0, 0.01, 0.02, 0.05, 0.1]
}

# Initialize the Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)

# Set up the randomized search with cross-validation
random_search = RandomizedSearchCV(estimator=dt_regressor, param_distributions=param_grid, n_iter=50, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model
random_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate on the test set
y_pred = random_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores = cross_val_score(random_search.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
rmse_scores = np.sqrt(-cross_val_score(random_search.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3))

r2_mean = r2_scores.mean()
r2_std = r2_scores.std()
rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

print("R2 Score on Test Set:", r2)
print("RMSE on Test Set:", rmse)
print("Cross-Validation R2 Mean:", r2_mean)
print("Cross-Validation R2 Std Dev:", r2_std)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)


42 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
22 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\V R N S Nikhil\AppData

Best Parameters: {'splitter': 'best', 'min_samples_split': 2, 'min_samples_leaf': 8, 'min_impurity_decrease': 0.1, 'max_leaf_nodes': 90, 'max_features': None, 'max_depth': 60, 'criterion': 'squared_error'}
Best Cross-Validation Score: 0.027003701490093013
R2 Score on Test Set: -0.006858463346514965
RMSE on Test Set: 1.17722569964723
Cross-Validation R2 Mean: 0.027003701490093013
Cross-Validation R2 Std Dev: 0.023239445135052366
Cross-Validation RMSE Mean: 1.1528238268192519
Cross-Validation RMSE Std Dev: 0.03323802472701659


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Custom round function
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50), (100, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'learning_rate': ['constant', 'adaptive'],
}

# Initialize the MLP regressor
mlp_regressor = MLPRegressor(max_iter=300, random_state=42)

# Set up the randomized search with cross-validation
random_search = RandomizedSearchCV(estimator=mlp_regressor, param_distributions=param_grid, n_iter=20, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model
random_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate on the test set
y_pred = random_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores = cross_val_score(random_search.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
neg_mse_scores = cross_val_score(random_search.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores = np.sqrt(-neg_mse_scores)

r2_mean = r2_scores.mean()
r2_std = r2_scores.std()
rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

print("R2 Score on Test Set:", r2)
print("RMSE on Test Set:", rmse)
print("Cross-Validation R2 Mean:", r2_mean)
print("Cross-Validation R2 Std Dev:", r2_std)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)


Best Parameters: {'solver': 'adam', 'learning_rate': 'constant', 'hidden_layer_sizes': (100, 100), 'alpha': 0.0001, 'activation': 'relu'}
Best Cross-Validation Score: 0.19958920805136682
R2 Score on Test Set: 0.4044178348407108
RMSE on Test Set: 0.9054122458655068
Cross-Validation R2 Mean: 0.19958920805136682
Cross-Validation R2 Std Dev: 0.05810260674986299
Cross-Validation RMSE Mean: 1.0441390233139205
Cross-Validation RMSE Std Dev: 0.022947655244567


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Custom round function
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the extended parameter grid for XGBoost Regressor
param_grid_xgb = {
    'n_estimators': [100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'min_child_weight': [1, 3, 5],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2, 0.3],
    'reg_alpha': [0, 0.01, 0.1, 1],
    'reg_lambda': [0, 0.01, 0.1, 1]
}

# Initialize the XGBoost Regressor
xgb_regressor = XGBRegressor(objective='reg:squarederror', random_state=42)

# Set up the randomized search with cross-validation for XGBoost Regressor
random_search_xgb = RandomizedSearchCV(estimator=xgb_regressor, param_distributions=param_grid_xgb, n_iter=50, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model for XGBoost Regressor
random_search_xgb.fit(X_train_scaled, y_train)

# Get the best parameters and the best score for XGBoost Regressor
best_params_xgb = random_search_xgb.best_params_
best_score_xgb = random_search_xgb.best_score_

print("Best Parameters for XGBoost Regressor:", best_params_xgb)
print("Best Cross-Validation Score for XGBoost Regressor:", best_score_xgb)

# Evaluate the XGBoost Regressor on the test set
y_pred_xgb = random_search_xgb.predict(X_test_scaled)
r2_xgb = r2_score(y_test, y_pred_xgb)
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores_xgb = cross_val_score(random_search_xgb.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
neg_mse_scores_xgb = cross_val_score(random_search_xgb.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores_xgb = np.sqrt(-neg_mse_scores_xgb)

r2_mean_xgb = r2_scores_xgb.mean()
r2_std_xgb = r2_scores_xgb.std()
rmse_mean_xgb = rmse_scores_xgb.mean()
rmse_std_xgb = rmse_scores_xgb.std()

print("R2 Score on Test Set for XGBoost Regressor:", r2_xgb)
print("RMSE on Test Set for XGBoost Regressor:", rmse_xgb)
print("Cross-Validation R2 Mean for XGBoost Regressor:", r2_mean_xgb)
print("Cross-Validation R2 Std Dev for XGBoost Regressor:", r2_std_xgb)
print("Cross-Validation RMSE Mean for XGBoost Regressor:", rmse_mean_xgb)
print("Cross-Validation RMSE Std Dev for XGBoost Regressor:", rmse_std_xgb)


Best Parameters for XGBoost Regressor: {'subsample': 0.6, 'reg_lambda': 0.1, 'reg_alpha': 0.01, 'n_estimators': 500, 'min_child_weight': 5, 'max_depth': 10, 'learning_rate': 0.01, 'gamma': 0.1, 'colsample_bytree': 1.0}
Best Cross-Validation Score for XGBoost Regressor: 0.33174968963502177
R2 Score on Test Set for XGBoost Regressor: 0.42723475298274205
RMSE on Test Set for XGBoost Regressor: 0.8878995818605064
Cross-Validation R2 Mean for XGBoost Regressor: 0.33174968963502177
Cross-Validation R2 Std Dev for XGBoost Regressor: 0.008041875297721395
Cross-Validation RMSE Mean for XGBoost Regressor: 0.9552361165573103
Cross-Validation RMSE Std Dev for XGBoost Regressor: 0.016763656701792768


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid
param_grid = {
    'iterations': [100, 200, 500],
    'depth': [4, 6, 8],
    'learning_rate': [0.01, 0.05, 0.1],
    'l2_leaf_reg': [1, 3, 5, 7],
    'border_count': [32, 50, 100]
}

# Initialize the CatBoost regressor
catboost_regressor = CatBoostRegressor(silent=True, random_state=42)

# Set up the randomized search with cross-validation
random_search = RandomizedSearchCV(estimator=catboost_regressor, param_distributions=param_grid, n_iter=20, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model
random_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate on the test set
y_pred = random_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores = cross_val_score(random_search.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
neg_mse_scores = cross_val_score(random_search.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores = np.sqrt(-neg_mse_scores)

r2_mean = r2_scores.mean()
r2_std = r2_scores.std()
rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

print("R2 Score on Test Set:", r2)
print("RMSE on Test Set:", rmse)
print("Cross-Validation R2 Mean:", r2_mean)
print("Cross-Validation R2 Std Dev:", r2_std)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)


3 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\catboost\core.py", line 5827, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Best Parameters: {'learning_rate': 0.1, 'l2_leaf_reg': 7, 'iterations': 500, 'depth': 6, 'border_count': 50}
Best Cross-Validation Score: 0.3217109878287166
R2 Score on Test Set: 0.40526149464641514
RMSE on Test Set: 0.9047707469561626
Cross-Validation R2 Mean: 0.3217109878287166
Cross-Validation R2 Std Dev: 0.012665310558518588
Cross-Validation RMSE Mean: 0.9623941757719517
Cross-Validation RMSE Std Dev: 0.020050303805165287


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid for KNN
param_grid = {
    'n_neighbors': list(np.arange(1, 101)),
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean', 'chebyshev']
}

# Initialize the KNN regressor
knn_regressor = KNeighborsRegressor()

# Set up the grid search with cross-validation
grid_search = GridSearchCV(estimator=knn_regressor, param_grid=param_grid, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')

# Fit the grid search model
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters for KNN Regressor:", best_params)
print("Best Cross-Validation Score for KNN Regressor (neg MSE):", best_score)

# Evaluate the KNN Regressor on the test set
y_pred = grid_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores = cross_val_score(grid_search.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
neg_mse_scores = cross_val_score(grid_search.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores = np.sqrt(-neg_mse_scores)

r2_mean = r2_scores.mean()
r2_std = r2_scores.std()
rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

print("R2 Score on Test Set:", r2)
print("RMSE on Test Set:", rmse)
print("Cross-Validation R2 Mean:", r2_mean)
print("Cross-Validation R2 Std Dev:", r2_std)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)


Best Parameters for KNN Regressor: {'metric': 'minkowski', 'n_neighbors': 6, 'weights': 'distance'}
Best Cross-Validation Score for KNN Regressor (neg MSE): -1.0139981809202536
R2 Score on Test Set: 0.3002146472755114
RMSE on Test Set: 0.9814268741712557
Cross-Validation R2 Mean: 0.25799637661894986
Cross-Validation R2 Std Dev: 0.0190442923407612
Cross-Validation RMSE Mean: 1.0067917972408449
Cross-Validation RMSE Std Dev: 0.031793021818026145


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid for SVR
param_grid = {
             'C':[0.1,0.01,10],
            'kernel':['linear','rgf','polynomial'],
            'degree':[1,2,3],
            'epsilon':[0.11,0.2,1,10]
}

# Initialize the SVR
svr = SVR()

# Set up the grid search with cross-validation
grid_search = GridSearchCV(estimator=svr, param_grid=param_grid, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')

# Fit the grid search model
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters for SVR:", best_params)
print("Best Cross-Validation Score for SVR (neg MSE):", best_score)

# Evaluate the SVR on the test set
y_pred = grid_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores = cross_val_score(grid_search.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
neg_mse_scores = cross_val_score(grid_search.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores = np.sqrt(-neg_mse_scores)

r2_mean = r2_scores.mean()
r2_std = r2_scores.std()
rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

print("R2 Score on Test Set:", r2)
print("RMSE on Test Set:", rmse)
print("Cross-Validation R2 Mean:", r2_mean)
print("Cross-Validation R2 Std Dev:", r2_std)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)


216 fits failed out of a total of 324.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
22 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\V R N S Nikhil\AppDat

Best Parameters for SVR: {'C': 0.01, 'degree': 1, 'epsilon': 0.2, 'kernel': 'linear'}
Best Cross-Validation Score for SVR (neg MSE): -1.0116578773464109
R2 Score on Test Set: 0.2662733871941567
RMSE on Test Set: 1.0049458423334618
Cross-Validation R2 Mean: 0.2578608785629293
Cross-Validation R2 Std Dev: 0.044584365065218366
Cross-Validation RMSE Mean: 1.0057345651312564
Cross-Validation RMSE Std Dev: 0.012484464211708443


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score, mean_squared_error

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid for ElasticNet
param_grid = {
    'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
    'l1_ratio': np.arange(0.0, 1.1, 0.1)
}

# Initialize the ElasticNet regressor
elastic_net = ElasticNet(random_state=42)

# Set up the grid search with cross-validation
grid_search = GridSearchCV(estimator=elastic_net, param_grid=param_grid, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')

# Fit the grid search model
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters for ElasticNet:", best_params)
print("Best Cross-Validation Score for ElasticNet (neg MSE):", best_score)

# Evaluate the ElasticNet on the test set
y_pred = grid_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores = cross_val_score(grid_search.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
neg_mse_scores = cross_val_score(grid_search.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores = np.sqrt(-neg_mse_scores)

r2_mean = r2_scores.mean()
r2_std = r2_scores.std()
rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

print("R2 Score on Test Set:", r2)
print("RMSE on Test Set:", rmse)
print("Cross-Validation R2 Mean:", r2_mean)
print("Cross-Validation R2 Std Dev:", r2_std)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Best Parameters for ElasticNet: {'alpha': 0.1, 'l1_ratio': 0.0}
Best Cross-Validation Score for ElasticNet (neg MSE): -0.9549007944163699


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


R2 Score on Test Set: 0.31928358179245053
RMSE on Test Set: 0.967962727741754
Cross-Validation R2 Mean: 0.2995457197755134
Cross-Validation R2 Std Dev: 0.04020521658566474
Cross-Validation RMSE Mean: 0.9771345162454229
Cross-Validation RMSE Std Dev: 0.01043702918426269


  model = cd_fast.enet_coordinate_descent(


In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Load the dataset
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target variable
X = data.drop('output', axis=1)
y = data['output']

# Custom round function
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()

# Handle missing values in the features and target
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
y_imputed = SimpleImputer(strategy='mean').fit_transform(y.values.reshape(-1, 1)).ravel()

# Apply PCA to reduce dimensionality
pca = PCA(n_components=0.95)
scaler = StandardScaler()
X_reduced = scaler.fit_transform(pca.fit_transform(X_imputed))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reduced, y_imputed, test_size=0.2, random_state=42)

# Set up the Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_regressor.fit(X_train, y_train)

# Make predictions
y_pred_train = rf_regressor.predict(X_train)
y_pred_test = rf_regressor.predict(X_test)

# Calculate the R^2 score
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

# Calculate the RMSE
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores = cross_val_score(rf_regressor, X_train, y_train, scoring='r2', cv=3)
neg_mse_scores = cross_val_score(rf_regressor, X_train, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores = np.sqrt(-neg_mse_scores)

r2_mean = r2_scores.mean()
r2_std = r2_scores.std()
rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

# Print results
print("Training R^2 score:", r2_train)
print("Testing R^2 score:", r2_test)
print("R2 Score on Test Set:", r2_test)
print("RMSE on Test Set:", rmse_test)
print("Cross-Validation R2 Mean:", r2_mean)
print("Cross-Validation R2 Std Dev:", r2_std)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)


Training R^2 score: 0.8933635271987839
Testing R^2 score: 0.30942719657546114
R2 Score on Test Set: 0.30942719657546114
RMSE on Test Set: 0.97494531652231
Cross-Validation R2 Mean: 0.20767244880127234
Cross-Validation R2 Std Dev: 0.03137782017063554
Cross-Validation RMSE Mean: 1.0396195117180953
Cross-Validation RMSE Std Dev: 0.00202905653859458


In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Custom round function
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if (decimal_part == 0.5):
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the extended parameter grid for AdaBoost Regressor
param_grid_adaboost = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
    'loss': ['linear', 'square', 'exponential'],
    'base_estimator__max_depth': [None, 10, 20, 30, 40, 50],
    'base_estimator__min_samples_split': [2, 5, 10],
    'base_estimator__min_samples_leaf': [1, 2, 4],
    'base_estimator__max_features': [None, 'auto', 'sqrt', 'log2']
}

# Initialize the base estimator
base_estimator = DecisionTreeRegressor(random_state=42)

# Initialize the AdaBoost Regressor
adaboost_regressor = AdaBoostRegressor(base_estimator=base_estimator, random_state=42)

# Set up the randomized search with cross-validation for AdaBoost Regressor
random_search_adaboost = RandomizedSearchCV(estimator=adaboost_regressor, param_distributions=param_grid_adaboost, n_iter=50, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model for AdaBoost Regressor
random_search_adaboost.fit(X_train_scaled, y_train)

# Get the best parameters and the best score for AdaBoost Regressor
best_params_adaboost = random_search_adaboost.best_params_
best_score_adaboost = random_search_adaboost.best_score_

print("Best Parameters for AdaBoost Regressor:", best_params_adaboost)
print("Best Cross-Validation Score for AdaBoost Regressor:", best_score_adaboost)

# Evaluate the AdaBoost Regressor on the test set
y_pred_adaboost = random_search_adaboost.predict(X_test_scaled)
r2_adaboost = r2_score(y_test, y_pred_adaboost)
rmse_adaboost = np.sqrt(mean_squared_error(y_test, y_pred_adaboost))

print("R2 Score on Test Set for AdaBoost Regressor:", r2_adaboost)
print("RMSE on Test Set for AdaBoost Regressor:", rmse_adaboost)

# Perform cross-validation to get the mean and std of R² and RMSE
r2_scores_adaboost = cross_val_score(random_search_adaboost.best_estimator_, X_train_scaled, y_train, scoring='r2', cv=3)
neg_mse_scores_adaboost = cross_val_score(random_search_adaboost.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores_adaboost = np.sqrt(-neg_mse_scores_adaboost)

r2_mean_adaboost = r2_scores_adaboost.mean()
r2_std_adaboost = r2_scores_adaboost.std()
rmse_mean_adaboost = rmse_scores_adaboost.mean()
rmse_std_adaboost = rmse_scores_adaboost.std()

print("Cross-Validation R2 Mean for AdaBoost Regressor:", r2_mean_adaboost)
print("Cross-Validation R2 Std Dev for AdaBoost Regressor:", r2_std_adaboost)
print("Cross-Validation RMSE Mean for AdaBoost Regressor:", rmse_mean_adaboost)
print("Cross-Validation RMSE Std Dev for AdaBoost Regressor:", rmse_std_adaboost)


33 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
28 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\ensemble\_weight_boosting.py", line 171, in fit


Best Parameters for AdaBoost Regressor: {'n_estimators': 300, 'loss': 'square', 'learning_rate': 0.5, 'base_estimator__min_samples_split': 5, 'base_estimator__min_samples_leaf': 2, 'base_estimator__max_features': 'sqrt', 'base_estimator__max_depth': 20}
Best Cross-Validation Score for AdaBoost Regressor: 0.3264014031850772
R2 Score on Test Set for AdaBoost Regressor: 0.41320002568904035
RMSE on Test Set for AdaBoost Regressor: 0.8987120506042103




Cross-Validation R2 Mean for AdaBoost Regressor: 0.3264014031850772
Cross-Validation R2 Std Dev for AdaBoost Regressor: 0.004644549338903321
Cross-Validation RMSE Mean for AdaBoost Regressor: 0.9591849056306813
Cross-Validation RMSE Std Dev for AdaBoost Regressor: 0.022191797236837778
