In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score,mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV

In [15]:

cubic_df = pd.read_csv('/Users/muthuraj/Downloads/muthuraj-models(gb,knn)/Cubic.csv')
features = cubic_df.drop(columns=['DATE','GDP'])
target = cubic_df['GDP']

scaling = StandardScaler()
features_scaled = scaling.fit_transform(features)

X_train, X_test, y_train, y_test = train_test_split(
    features_scaled, target, test_size=0.3, random_state=42
)


In [18]:

gradient_boosting = GradientBoostingRegressor(random_state=42)
knn = KNeighborsRegressor()

gradient_boosting.fit(X_train, y_train)
knn.fit(X_train, y_train)

gb_prediction = gradient_boosting.predict(X_test)
knn_prediction = knn.predict(X_test)

gb_r2 = r2_score(y_test, gb_prediction)
knn_r2 = r2_score(y_test, knn_prediction)

gb_mse = mean_squared_error(y_test, gb_prediction)
gb_rmse = np.sqrt(gb_mse)
gb_mae = mean_absolute_error(y_test, gb_prediction)

knn_mse = mean_squared_error(y_test, knn_prediction)
knn_rmse = np.sqrt(knn_mse)
knn_mae = mean_absolute_error(y_test, knn_prediction)

print(f"Gradient Boosting Evaluation on Test Set:")
print(f"R²: {gb_r2:.4f}")
print(f"MSE: {gb_mse:.4f}")
print(f"RMSE: {gb_rmse:.4f}")
print(f"MAE: {gb_mae:.4f}")

print(f"KNN Evaluation on Test Set:")
print(f"R²: {knn_r2:.4f}")
print(f"MSE: {knn_mse:.4f}")
print(f"RMSE: {knn_rmse:.4f}")
print(f"MAE: {knn_mae:.4f}")

Gradient Boosting Evaluation on Test Set:
R²: 0.9999
MSE: 3311.3441
RMSE: 57.5443
MAE: 38.5108
KNN Evaluation on Test Set:
R²: 1.0000
MSE: 1280.4465
RMSE: 35.7833
MAE: 15.1341


In [17]:
#cross validation 
cross_validation_score_gb = cross_val_score(gradient_boosting, X_train, y_train, cv=5, scoring='r2')
cross_validation_score_knn = cross_val_score(knn, X_train, y_train, cv=5, scoring='r2')

print(f"Gradient boosting cross validation scores for R^2: {cross_validation_score_gb}")
print(f"gb mean cross validation score R^2: {cross_validation_score_gb.mean():.4f}")

print(f"knn cross validation scores for R^2: {cross_validation_score_knn}")
print(f"knn mean cross validation score R^2: {cross_validation_score_knn.mean():.4f}")

Gradient boosting cross validation scores for R^2: [0.99993516 0.99994102 0.99993487 0.99990083 0.99994866]
gb mean cross validation score R^2: 0.9999
knn cross validation scores for R^2: [0.99997338 0.99997262 0.99995834 0.99995395 0.99997437]
knn mean cross validation score R^2: 1.0000


In [19]:
from sklearn.model_selection import GridSearchCV


param_grid_gb = {
    'n_estimators': [100, 200, 300, 400, 500], 
    'learning_rate': [0.001, 0.005, 0.01, 0.05, 0.1], 
    'max_depth': [3, 5, 7, 10]  
}

param_grid_knn = {
    'n_neighbors': [3, 5, 7, 9, 11, 13],  
    'weights': ['uniform', 'distance'],  
    'p': [1, 2, 3]  
}


grid_search_gb = GridSearchCV(estimator=GradientBoostingRegressor(random_state=42),
                              param_grid=param_grid_gb,
                              cv=5,  
                              scoring='neg_mean_squared_error',
                              n_jobs=-1)  


grid_search_knn = GridSearchCV(estimator=KNeighborsRegressor(),
                               param_grid=param_grid_knn,
                               cv=5,  
                               scoring='neg_mean_squared_error',
                               n_jobs=-1)


grid_search_gb.fit(X_train, y_train)
print(f"Best parameters for Gradient Boosting: {grid_search_gb.best_params_}")
print(f"Best Negative MSE for Gradient Boosting: {grid_search_gb.best_score_:.4f}")


grid_search_knn.fit(X_train, y_train)
print(f"Best parameters for KNN: {grid_search_knn.best_params_}")
print(f"Best Negative MSE for KNN: {grid_search_knn.best_score_:.4f}")


best_gb = grid_search_gb.best_estimator_
best_knn = grid_search_knn.best_estimator_


gb_best_pred = best_gb.predict(X_test)
knn_best_pred = best_knn.predict(X_test)



gb_mse_best = mean_squared_error(y_test, gb_best_pred)
gb_rmse_best = np.sqrt(gb_mse_best)
gb_mae_best = mean_absolute_error(y_test, gb_best_pred)

print(f"\nBest Gradient Boosting Evaluation on Test Set:")
print(f"MSE: {gb_mse_best:.4f}")
print(f"RMSE: {gb_rmse_best:.4f}")
print(f"MAE: {gb_mae_best:.4f}")


knn_mse_best = mean_squared_error(y_test, knn_best_pred)
knn_rmse_best = np.sqrt(knn_mse_best)
knn_mae_best = mean_absolute_error(y_test, knn_best_pred)

print(f"\nBest KNN Evaluation on Test Set:")
print(f"MSE: {knn_mse_best:.4f}")
print(f"RMSE: {knn_rmse_best:.4f}")
print(f"MAE: {knn_mae_best:.4f}")


Best parameters for Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 500}
Best Negative MSE for Gradient Boosting: -56.3433
Best parameters for KNN: {'n_neighbors': 3, 'p': 1, 'weights': 'distance'}
Best Negative MSE for KNN: -145.5332

Best Gradient Boosting Evaluation on Test Set:
MSE: 37.7145
RMSE: 6.1412
MAE: 1.5277

Best KNN Evaluation on Test Set:
MSE: 99.4683
RMSE: 9.9734
MAE: 3.3829


In [21]:
from sklearn.metrics import r2_score

gb_r2_best = r2_score(y_test, gb_best_pred)

knn_r2_best = r2_score(y_test, knn_best_pred)

print(f"\nBest Gradient Boosting R² on Test Set: {gb_r2_best:.4f}")
print(f"Best KNN R² on Test Set: {knn_r2_best:.4f}")



Best Gradient Boosting R² on Test Set: 1.0000
Best KNN R² on Test Set: 1.0000
