In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import StackingRegressor
from xgboost import XGBRegressor

# Load your historical data
csv_file_path = "E:\mini\Commodity Data\Banking giants\BAC.csv" 
df = pd.read_csv(csv_file_path)
df['Date'] = pd.to_datetime(df['Date'])

# Data Cleaning and Preprocessing
print("Initial Data Shape:", df.shape)

# Remove duplicate rows
df.drop_duplicates(inplace=True)
print("Data Shape after Removing Duplicates:", df.shape)

# Remove rows with missing values
df.dropna(inplace=True)
print("Data Shape after Removing Null Values:", df.shape)

# Feature Engineering
# You can perform feature engineering here by creating new features or transformations.

# Feature Scaling
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[['Open', 'High', 'Low', 'Volume']])

# Split the data into training and test sets
X = scaled_data[:, :-1]  # Exclude the 'Close' column
y = scaled_data[:, -1]   # Close column as the target variable

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter Tuning for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_rf = grid_search.best_estimator_

# Ensemble Methods (AdaBoost, Gradient Boosting, XGBoost)
ada_boost = AdaBoostRegressor(random_state=42)
grad_boost = GradientBoostingRegressor(random_state=42)
xgboost = XGBRegressor(random_state=42)

# Stacking Ensemble
estimators = [
    ('Random Forest', best_rf),
    ('AdaBoost', ada_boost),
    ('Gradient Boosting', grad_boost),
    ('XGBoost', xgboost)
]

stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RandomForestRegressor())

# Fit the stacking regressor
stacking_regressor.fit(X_train, y_train)

# Evaluate the models (Random Forest and Stacking)
def evaluate_model(model, X, y):
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y, y_pred)
    return rmse, r2

# Evaluate Random Forest
rf_rmse, rf_r2 = evaluate_model(best_rf, X_test, y_test)

# Evaluate Stacking Ensemble
stacking_rmse, stacking_r2 = evaluate_model(stacking_regressor, X_test, y_test)

# Print Evaluation Results
print("Random Forest - RMSE: {:.4f}, R-squared: {:.4f}".format(rf_rmse, rf_r2))
print("Stacking Ensemble - RMSE: {:.4f}, R-squared: {:.4f}".format(stacking_rmse, stacking_r2))


In [None]:
# Section 2: Testing and Generating Graphical Outputs

import matplotlib.pyplot as plt
import plotly.subplots as sp
import plotly.graph_objects as go

# Function to plot actual vs. predicted prices interactively
def plot_actual_vs_predicted_interactive(model, X, y, title, description):
    y_pred = model.predict(X)
    # Inverse transform the scaled data
    y_actual = scaler.inverse_transform(np.hstack([X.reshape(-1, 4), y.reshape(-1, 1)]))[:, -1]
    y_predicted = scaler.inverse_transform(np.hstack([X.reshape(-1, 4), y_pred.reshape(-1, 1)]))[:, -1]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['Date'].iloc[-len(y_actual):], y=y_actual, mode='lines', name='Actual Prices', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=df['Date'].iloc[-len(y_predicted):], y=y_predicted, mode='lines', name='Predicted Prices', line=dict(color='red')))
    fig.update_layout(
        title=title,
        xaxis_title='Date',
        yaxis_title='Close Price',
        legend_title='Legend',
        annotations=[
            dict(
                x=df['Date'].iloc[-len(y_actual) // 2],
                y=max(y_actual) * 0.8,
                xref="x",
                yref="y",
                text=description,
                showarrow=True,
                arrowhead=7,
                ax=0,
                ay=-40
            )
        ]
    )
    fig.show()

# Evaluate Random Forest
rf_rmse, rf_r2 = evaluate_model(best_rf, X_test, y_test)

# Evaluate Stacking Ensemble
stacking_rmse, stacking_r2 = evaluate_model(stacking_regressor, X_test, y_test)

# Print Evaluation Results
print("Random Forest - RMSE: {:.4f}, R-squared: {:.4f}".format(rf_rmse, rf_r2))
print("Stacking Ensemble - RMSE: {:.4f}, R-squared: {:.4f}".format(stacking_rmse, stacking_r2))

# Plot Actual vs. Predicted Prices for Random Forest
plot_actual_vs_predicted_interactive(best_rf, X_test, y_test, 'Random Forest - Actual vs. Predicted Prices', description='Random Forest Model')

# Plot Actual vs. Predicted Prices for Stacking Ensemble
plot_actual_vs_predicted_interactive(stacking_regressor, X_test, y_test, 'Stacking Ensemble - Actual vs. Predicted Prices', description='Stacking Ensemble Model')
