In [None]:
import pandas as pd 
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = pd.read_csv('D:\Master_Folder\Data Science Course\Projects\StockMarket\stock_data\SUZLON.NS_2023-01-01_to_2024-11-21_ML.csv')

data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)
data.set_index('Date', inplace=True)


data['Next_1_day_close'] = data['Close'].shift(-1)
data.dropna(inplace=True)

# data.head()

features = ['Close', 'Upward_Downward_Probability', 'Temporal_Features', 'Cluster', 'Anomaly']
target = 'Next_1_day_close'

train_size = int(len(data) * 0.7)
train_data = data[:train_size]
test_data = data[train_size:]

X_train = train_data[features]
y_train = train_data[target]

X_test = test_data[features]
y_test = test_data[target]

scaler = StandardScaler() 

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

param_grid = {'n_estimators': [50, 75, 100], 'max_features': [0.2, 0.6, 1.0], 'max_samples': [0.5, 0.75, 1.0]}

rf = RandomForestRegressor()
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='r2', verbose=1)
grid_search.fit(X_train_scaled, y_train)

best_rf = grid_search.best_estimator_

# print(best_rf)

y_pred = best_rf.predict(X_test_scaled)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = (np.abs((y_test - y_pred) / y_test).mean()) * 100

print("Best Parameters:", grid_search.best_params_)
print(f"R²: {r2:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"MSE: {mse:.4f}")

best_rf.feature_importances_

