In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.neighbors import KNeighborsRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM

In [22]:
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv('drive/My Drive/Colab Notebooks/Stock Price Prediction RNN/SBIN.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
# Load the dataset
#df = pd.read_csv('/content/SBIN.NS.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,01-01-1996,18.691147,18.978922,18.540184,18.82324,12.409931,43733533.0
1,02-01-1996,18.894005,18.964767,17.738192,18.224106,12.014931,56167280.0
2,03-01-1996,18.327892,18.568489,17.643839,17.738192,11.694577,68296318.0
3,04-01-1996,17.502312,17.832542,17.223972,17.676863,11.654142,86073880.0
4,05-01-1996,17.738192,17.785366,17.459852,17.577793,11.588827,76613039.0


In [24]:
# Drop the 'Date' and 'Adj Close' columns
df.drop(['Date', 'Adj Close'], axis=1, inplace=True)

In [25]:
df.head()

Unnamed: 0,Open,High,Low,Close,Volume
0,18.691147,18.978922,18.540184,18.82324,43733533.0
1,18.894005,18.964767,17.738192,18.224106,56167280.0
2,18.327892,18.568489,17.643839,17.738192,68296318.0
3,17.502312,17.832542,17.223972,17.676863,86073880.0
4,17.738192,17.785366,17.459852,17.577793,76613039.0


In [26]:
# Handle missing values
imputer = SimpleImputer(strategy='mean')
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

In [27]:
# Select features and target variable
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Close']

In [28]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Scale the features using Min-Max scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [29]:
X_train.shape

(5659, 4)

In [30]:
X_test.shape

(1415, 4)

In [31]:
y_train.shape

(5659,)

In [32]:
y_test.shape

(1415,)

In [None]:
# Function to evaluate and print RMSE, MAE, and MAPE
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    mae = mean_absolute_error(y_test, predictions)
    mape = mean_absolute_percentage_error(y_test, predictions)

    print(f"RMSE: {rmse}")
    print(f"MAE: {mae}")
    print(f"MAPE: {mape}\n")
    
    return rmse, mae, mape


In [None]:
metrics = {
    "Model": [],
    "RMSE": [],
    "MAE": [],
    "MAPE": []
}

## 1. LINEAR REGRESSION

In [33]:
# Create a linear regression model
model1 = LinearRegression()

In [34]:
y_train.head()

5286    257.350006
3408    129.464996
5477    279.350006
6906    588.500000
530      21.644367
Name: Close, dtype: float64

In [35]:
# Train the model
model1.fit(X_train, y_train)

In [36]:
rmse, mae, mape = evaluate_model(model1, X_test_scaled, y_test)
metrics["Model"].append("Linear Regressor")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 2. SVR

In [40]:
# Create an SVR model
model2 = SVR()

In [42]:
# Train the model
model2.fit(X_train, y_train)

In [43]:
rmse, mae, mape = evaluate_model(model2, X_test_scaled, y_test)
metrics["Model"].append("SVR")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 3. Random Forest

In [46]:
model3 = RandomForestRegressor()

In [48]:
# Train the model
model3.fit(X_train, y_train)

In [49]:
rmse, mae, mape = evaluate_model(model3, X_test_scaled, y_test)
metrics["Model"].append("Random Forest")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 4. Gradient Boosting Models (GBM)

In [52]:
model4 = GradientBoostingRegressor()

In [54]:
# Train the model
model4.fit(X_train, y_train)

In [55]:
rmse, mae, mape = evaluate_model(model4, X_test_scaled, y_test)
metrics["Model"].append("GBM")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 5. Extreme Gradient Boosting (XGBoost)

In [58]:
import xgboost as xgb
# Create an XGBoost model
model5 = xgb.XGBRegressor()

In [60]:
# Train the model
model5.fit(X_train, y_train)

In [None]:
rmse, mae, mape = evaluate_model(model5, X_test_scaled, y_test)
metrics["Model"].append("XGBoost")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 6. AdaBoostRegressor

In [64]:
model6 = AdaBoostRegressor()

In [66]:
# Train the model
model6.fit(X_train, y_train)

In [67]:
rmse, mae, mape = evaluate_model(model6, X_test_scaled, y_test)
metrics["Model"].append("AdaBoost Regressor")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 7. Decision Tree

In [70]:
model7 = DecisionTreeRegressor()

In [72]:
# Train the model
model7.fit(X_train, y_train)

In [73]:
rmse, mae, mape = evaluate_model(model7, X_test_scaled, y_test)
metrics["Model"].append("Decision Tree")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 8. KNeighborsRegressor(KNN)

In [76]:
# Create a KNN model
model8 = KNeighborsRegressor()

In [78]:
# Train the model
model8.fit(X_train, y_train)

In [79]:
rmse, mae, mape = evaluate_model(model8, X_test_scaled, y_test)
metrics["Model"].append("KNN")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)

## 9. Artificial Neural Networks (ANN)

In [84]:
# Create an ANN model
model9 = Sequential()
model9.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
model9.add(Dense(16, activation='relu'))
model9.add(Dense(1, activation='linear'))

In [85]:
# Compile the model
model9.compile(loss='mean_squared_error', optimizer='adam')

In [86]:
# Train the model
model9.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)

<keras.src.callbacks.History at 0x7ca68076d540>

In [87]:
rmse, mae, mape = evaluate_model(model9, X_test_scaled, y_test)
metrics["Model"].append("ANN")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)



## 10. LSTM(Long Short term Memory)

In [92]:
# Reshape the input data for LSTM
n_features = X_train_scaled.shape[1]
n_steps = 10
n_samples_train = X_train_scaled.shape[0] - n_steps + 1
n_samples_test = X_test_scaled.shape[0] - n_steps + 1

# Reshape the input data
X_train_reshaped = np.array([X_train_scaled[i:i+n_steps, :] for i in range(n_samples_train)])
X_test_reshaped = np.array([X_test_scaled[i:i+n_steps, :] for i in range(n_samples_test)])


In [93]:
# Create an LSTM model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))


In [94]:
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')


In [95]:
# Train the model
model.fit(X_train_reshaped, y_train[n_steps-1:], epochs=100, batch_size=32, verbose=0)

<keras.src.callbacks.History at 0x7ca680459cc0>

In [96]:
rmse, mae, mape = evaluate_model(model10, X_test_scaled, y_test)
metrics["Model"].append("LSTM")
metrics["RMSE"].append(rmse)
metrics["MAE"].append(mae)
metrics["MAPE"].append(mape)



In [None]:
# Create a DataFrame for metrics
metrics_df = pd.DataFrame(metrics)

# Plot RMSE, MAE, and MAPE for each model
plt.figure(figsize=(15, 5))

# RMSE Plot
plt.subplot(1, 3, 1)
plt.bar(metrics_df['Model'], metrics_df['RMSE'], color='lightblue')
plt.xlabel('RMSE')
plt.title('RMSE for Different Models')
plt.tight_layout()
plt.show()

In [None]:
# MAE Plot
plt.subplot(1, 3, 2)
plt.bar(metrics_df['Model'], metrics_df['MAE'], color='lightgreen')
plt.xlabel('MAE')
plt.title('MAE for Different Models')
plt.tight_layout()
plt.show()

In [None]:
# MAPE Plot
plt.subplot(1, 3, 3)
plt.bar(metrics_df['Model'], metrics_df['MAPE'], color='salmon')
plt.xlabel('MAPE')
plt.title('MAPE for Different Models')
plt.tight_layout()
plt.show()

# Using of LightGBM and CatBoost For Optimizing the model accuracy and time complexity

In [None]:
# Import necessary libraries
import lightgbm as lgb
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, accuracy_score, precision_score, confusion_matrix, recall_score, f1_score

# Function to train and evaluate a model
def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, pred))
    mae = mean_absolute_error(y_test, pred)
    mape = mean_absolute_percentage_error(y_test, pred)
    accuracy = accuracy_score(y_test > pred, y_test > pred.round())
    precision = precision_score(y_test > pred, y_test > pred.round())
    confusion = confusion_matrix(y_test > pred, y_test > pred.round())
    recall = recall_score(y_test > pred, y_test > pred.round())
    f1 = f1_score(y_test > pred, y_test > pred.round())
    return rmse, mae, mape, accuracy, precision, confusion, recall, f1

# Train and evaluate LightGBM model for from this directly print accuracy 
model_lightgbm = lgb.LGBMRegressor()
metrics_lightgbm = train_and_evaluate_model(model_lightgbm, X_train, X_test, y_train, y_test)
print("LightGBM Metrics:", metrics_lightgbm)

# Train and evaluate CatBoost model
model_catboost = CatBoostRegressor(verbose=0)
metrics_catboost = train_and_evaluate_model(model_catboost, X_train, X_test, y_train, y_test)
print("CatBoost Metrics:", metrics_catboost)