In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.ensemble import VotingRegressor

# Load the data
data = pd.read_csv('/content/BankNifty Updated Data.csv')

# Split the data into features and target
X = data.drop(['Range'], axis=1)
y = data['Range']

# Convert date strings to datetime objects
X['Date'] = pd.to_datetime(X['Date'])

# Extract weekday and drop date column
X['Day'] = X['Date'].dt.weekday
X = X.drop('Date', axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Hyperparameter tuning for Gradient Boosting
rf = RandomForestRegressor(random_state=42)
param_grid_rf = {
    'n_estimators': [50, 100, 150],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}
rf_cv = GridSearchCV(rf, param_grid_rf, cv=5)
rf_cv.fit(X_train, y_train)
rf_best_params = rf_cv.best_params_

# Hyperparameter tuning for Gradient Boosting
gb = GradientBoostingRegressor(random_state=42)
param_grid_gb = {
    'n_estimators': [100, 200, 500],
    'max_depth': [3, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}
gb_cv = GridSearchCV(gb, param_grid_gb, cv=5)
gb_cv.fit(X_train, y_train)
gb_best_params = gb_cv.best_params_

# Train the models with the best hyperparameters
rf_model = RandomForestRegressor(random_state=42, **rf_best_params)
gb_model = GradientBoostingRegressor(random_state=42, **gb_best_params)

# Fit the models to the training data
rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)

# Make predictions on the testing data
rf_y_pred = rf_model.predict(X_test)
gb_y_pred = gb_model.predict(X_test)

# Ensemble the models using VotingRegressor
ensemble_model = VotingRegressor([('rf', rf_model), ('gb', gb_model)])

ensemble_model.fit(X_train, y_train)
# Make predictions on the test set using the ensemble model
ensemble_model_y_pred = ensemble_model.predict(X_test)


# Evaluate the performance of the models using mean squared error
rf_mse = mean_squared_error(y_test, rf_y_pred)
gb_mse = mean_squared_error(y_test, gb_y_pred)
ensemble_mse = mean_squared_error(y_test, ensemble_model_y_pred)

print("Random Forest Mean Squared Error:", rf_mse)
print("Gradient Boosting Mean Squared Error:", gb_mse)
print("Ensemble Mean Squared Error:", ensemble_mse)

Random Forest Mean Squared Error: 76235.36971047196
Gradient Boosting Mean Squared Error: 65554.25929530173
Ensemble Mean Squared Error: 69089.49989305991


In [None]:
# Get the data for the next day
next_day_data = pd.DataFrame({'Open': [42819.0], 'High': [43037.40] ,'Low': [42750.20] ,'Close': [43000.0], 'Shares Traded': [238716537],'Turnover (Rs. Cr)': [238716537],'Day': [4]})
# Use the trained model to make a prediction for the next day's range
next_day_range = ensemble_model.predict(next_day_data)

# Print the predicted range for the next day
print("Predicted range for next day (Friday):", next_day_range[0])

Predicted range for next day (Friday): 785.4836784064761
