In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\\ML\\Auto.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

data['output'].value_counts()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the extended parameter grid for AdaBoost Regressor
param_grid_adaboost = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
    'loss': ['linear', 'square', 'exponential'],
    'base_estimator__max_depth': [None, 10, 20, 30, 40, 50],
    'base_estimator__min_samples_split': [2, 5, 10],
    'base_estimator__min_samples_leaf': [1, 2, 4],
    'base_estimator__max_features': [None, 'auto', 'sqrt', 'log2']
}

# Initialize the base estimator
base_estimator = DecisionTreeRegressor(random_state=42)

# Initialize the AdaBoost Regressor
adaboost_regressor = AdaBoostRegressor(base_estimator=base_estimator, random_state=42)

# Set up the randomized search with cross-validation for AdaBoost Regressor
random_search_adaboost = RandomizedSearchCV(estimator=adaboost_regressor, param_distributions=param_grid_adaboost, n_iter=50, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model for AdaBoost Regressor
random_search_adaboost.fit(X_train_scaled, y_train)

# Get the best parameters and the best score for AdaBoost Regressor
best_params_adaboost = random_search_adaboost.best_params_
best_score_adaboost = random_search_adaboost.best_score_

print("Best Parameters for AdaBoost Regressor:", best_params_adaboost)
print("Best Cross-Validation Score for AdaBoost Regressor:", best_score_adaboost)

# Evaluate the AdaBoost Regressor on the test set
y_pred_adaboost = random_search_adaboost.predict(X_test_scaled)
r2_adaboost = r2_score(y_test, y_pred_adaboost)
print("R2 Score on Test Set for AdaBoost Regressor:", r2_adaboost)

33 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
13 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\ensemble\_weight_boosting.py", line 171, in fit


Best Parameters for AdaBoost Regressor: {'n_estimators': 400, 'loss': 'exponential', 'learning_rate': 1.0, 'base_estimator__min_samples_split': 5, 'base_estimator__min_samples_leaf': 2, 'base_estimator__max_features': None, 'base_estimator__max_depth': 10}
Best Cross-Validation Score for AdaBoost Regressor: 0.1278552380961158
R2 Score on Test Set for AdaBoost Regressor: 0.2877110089748449


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\\ML\\MathBert.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

# Custom round function
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50), (100, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'learning_rate': ['constant', 'adaptive'],
}

# Initialize the MLP regressor
mlp_regressor = MLPRegressor(max_iter=300, random_state=42)

# Set up the randomized search with cross-validation
random_search = RandomizedSearchCV(estimator=mlp_regressor, param_distributions=param_grid, n_iter=20, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model
random_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate on the test set
y_pred = random_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Perform cross-validation to get the mean and std of RMSE
neg_mse_scores = cross_val_score(random_search.best_estimator_, X_train_scaled, y_train, scoring='neg_mean_squared_error', cv=3)
rmse_scores = np.sqrt(-neg_mse_scores)

rmse_mean = rmse_scores.mean()
rmse_std = rmse_scores.std()

print("R2 Score on Test Set:", r2)
print("RMSE on Test Set:", rmse)
print("Cross-Validation RMSE Mean:", rmse_mean)
print("Cross-Validation RMSE Std Dev:", rmse_std)

Best Parameters: {'solver': 'adam', 'learning_rate': 'constant', 'hidden_layer_sizes': (100, 100), 'alpha': 0.0001, 'activation': 'relu'}
Best Cross-Validation Score: 0.19958920805136682
R2 Score on Test Set: 0.4044178348407108
RMSE on Test Set: 0.9054122458655068
Cross-Validation RMSE Mean: 1.0441390233139205
Cross-Validation RMSE Std Dev: 0.022947655244567


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\\ML\\Auto.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']

def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the extended parameter grid
param_grid = {
    'criterion': ['squared_error'],
    'splitter': ['best'],
    'max_depth': [None, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'min_samples_split': [2, 5, 10, 15, 20],
    'min_samples_leaf': [1, 2, 4, 6, 8, 10],
    'max_features': [None, 'auto', 'sqrt', 'log2'],
    'max_leaf_nodes': [None, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'min_impurity_decrease': [0.0, 0.01, 0.02, 0.05, 0.1]
}

# Initialize the Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)

# Set up the randomized search with cross-validation
random_search = RandomizedSearchCV(estimator=dt_regressor, param_distributions=param_grid, n_iter=50, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model
random_search.fit(X_train_scaled, y_train)

# Get the best parameters and the best score
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate on the test set
y_pred = random_search.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
print("R2 Score on Test Set:", r2)

Best Parameters: {'splitter': 'best', 'min_samples_split': 2, 'min_samples_leaf': 10, 'min_impurity_decrease': 0.05, 'max_leaf_nodes': 80, 'max_features': 'log2', 'max_depth': None, 'criterion': 'squared_error'}
Best Cross-Validation Score: -0.005874197339122163
R2 Score on Test Set: -0.0071507654622853245


42 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
21 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "c:\Users\V R N S Nikhil\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\V R N S Nikhil\AppData

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Load the dataset
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\\ML\\Auto.xlsx'
data = pd.read_excel(file_path)

# Separate features and target variable
X = data.drop('output', axis=1)
y = data['output']
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()

# Handle missing values in the features and target
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
y_imputed = SimpleImputer(strategy='mean').fit_transform(y.values.reshape(-1, 1)).ravel()

# Apply PCA to reduce dimensionality
pca = PCA(n_components=0.95)
scaler = StandardScaler()
X_reduced = scaler.fit_transform(pca.fit_transform(X_imputed))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reduced, y_imputed, test_size=0.2, random_state=42)

# Set up the Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_regressor.fit(X_train, y_train)

# Make predictions
y_pred_train = rf_regressor.predict(X_train)
y_pred_test = rf_regressor.predict(X_test)

# Calculate the R^2 score
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

print("Training R^2 score:", r2_train)
print("Testing R^2 score:", r2_test)


Training R^2 score: 0.8057284435398251
Testing R^2 score: -0.28406784523837536


In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import r2_score

# Load the data
file_path = 'C:\\Users\\V R N S Nikhil\\OneDrive\\Desktop\\4th_sem\\ML\\FINAL\\ML\\Auto.xlsx'
data = pd.read_excel(file_path)

# Separate features and target
X = data.drop('output', axis=1)
y = data['output']
def custom_round(value):
    integer_part = int(value)
    decimal_part = value - integer_part
    if decimal_part == 0.5:
        return value
    else:
        return round(value)

data['output'] = data['output'].apply(custom_round)
data['output'].value_counts()


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the extended parameter grid for XGBoost Regressor
param_grid_xgb = {
    'n_estimators': [100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'min_child_weight': [1, 3, 5],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2, 0.3],
    'reg_alpha': [0, 0.01, 0.1, 1],
    'reg_lambda': [0, 0.01, 0.1, 1]
}

# Initialize the XGBoost Regressor
xgb_regressor = XGBRegressor(objective='reg:squarederror', random_state=42)

# Set up the randomized search with cross-validation for XGBoost Regressor
random_search_xgb = RandomizedSearchCV(estimator=xgb_regressor, param_distributions=param_grid_xgb, n_iter=50, cv=3, random_state=42, n_jobs=-1)

# Fit the randomized search model for XGBoost Regressor
random_search_xgb.fit(X_train_scaled, y_train)

# Get the best parameters and the best score for XGBoost Regressor
best_params_xgb = random_search_xgb.best_params_
best_score_xgb = random_search_xgb.best_score_

print("Best Parameters for XGBoost Regressor:", best_params_xgb)
print("Best Cross-Validation Score for XGBoost Regressor:", best_score_xgb)

# Evaluate the XGBoost Regressor on the test set
y_pred_xgb = random_search_xgb.predict(X_test_scaled)
r2_xgb = r2_score(y_test, y_pred_xgb)
print("R2 Score on Test Set for XGBoost Regressor:", r2_xgb)

Best Parameters for XGBoost Regressor: {'subsample': 0.6, 'reg_lambda': 0, 'reg_alpha': 0, 'n_estimators': 200, 'min_child_weight': 3, 'max_depth': 5, 'learning_rate': 0.05, 'gamma': 0.3, 'colsample_bytree': 1.0}
Best Cross-Validation Score for XGBoost Regressor: 0.12655874256629293
R2 Score on Test Set for XGBoost Regressor: 0.2915193329406416
