<a href="https://colab.research.google.com/github/Abhishek0-7/AI_Hackathon/blob/main/Stock_Price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load and preprocess data
stock_data = pd.read_csv("MSFT.csv")
stock_data = stock_data[['Date', 'Close']]

def str_to_datetime(date_str):
    year, month, day = map(int, date_str.split('-'))
    return datetime.datetime(year=year, month=month, day=day)

stock_data['Date'] = stock_data['Date'].apply(str_to_datetime)
stock_data.index = stock_data.pop('Date')

# Feature Scaling using MinMaxScaler
scaler = MinMaxScaler()
stock_data['Close'] = scaler.fit_transform(stock_data[['Close']])

# Plot the stock data
plt.figure(figsize=(12, 6))
plt.plot(stock_data.index, stock_data['Close'], color='royalblue')
plt.title("Scaled Microsoft Stock Prices", fontsize=16, fontweight='bold')
plt.xlabel("Date", fontsize=14)
plt.ylabel("Scaled Price", fontsize=14)
plt.grid()
plt.show()

# Create windowed data
def create_windowed_data(dataframe, start_date_str, end_date_str, window_size=5):
    start_date = str_to_datetime(start_date_str)
    end_date = str_to_datetime(end_date_str)
    target_date = start_date
    dates, X, Y = [], [], []
    last_time = False

    while True:
        subset = dataframe.loc[:target_date].tail(window_size + 1)
        if len(subset) != window_size + 1:
            return
        values = subset['Close'].to_numpy()
        x, y = values[:-1], values[-1]
        dates.append(target_date)
        X.append(x)
        Y.append(y)
        next_week = dataframe.loc[target_date:target_date + datetime.timedelta(days=7)]
        next_date_str = str(next_week.head(2).tail(1).index.values[0]).split('T')[0]
        next_date = str_to_datetime(next_date_str)
        if last_time:
            break
        target_date = next_date
        if target_date == end_date:
            last_time = True

    windowed_df = pd.DataFrame({'Target Date': dates})
    X = np.array(X)
    for i in range(0, window_size):
        windowed_df[f'Target-{window_size - i}'] = X[:, i]
    windowed_df['Target'] = Y
    return windowed_df

windowed_data = create_windowed_data(stock_data, '2021-03-25', '2024-05-29', window_size=5)

def prepare_X_y(windowed_dataframe):
    data_as_np = windowed_dataframe.to_numpy()
    dates = data_as_np[:, 0]
    features = data_as_np[:, 1:-1]
    X = features.reshape((len(dates), features.shape[1], 1))
    Y = data_as_np[:, -1]
    return dates, X.astype(np.float32), Y.astype(np.float32)

dates, X, y = prepare_X_y(windowed_data)

# Split data into train, validation, and test sets
train_split = int(len(dates) * 0.8)
val_split = int(len(dates) * 0.9)
dates_train, X_train, y_train = dates[:train_split], X[:train_split], y[:train_split]
dates_val, X_val, y_val = dates[train_split:val_split], X[train_split:val_split], y[train_split:val_split]
dates_test, X_test, y_test = dates[val_split:], X[val_split:], y[val_split:]

# Plot the data
plt.figure(figsize=(12, 6))
plt.plot(dates_train, y_train, color='green', label='Training Observations')
plt.plot(dates_val, y_val, color='orange', label='Validation Observations')
plt.plot(dates_test, y_test, color='red', label='Test Observations')
plt.title("Train, Validation, and Test Sets", fontsize=16, fontweight='bold')
plt.xlabel("Date", fontsize=14)
plt.ylabel("Stock Price", fontsize=14)
plt.legend()
plt.grid()
plt.show()

# Build LSTM model with extra units and batch normalization
model = Sequential([
    layers.Input((5, 1)),
    layers.LSTM(256, return_sequences=True),
    layers.BatchNormalization(),
    layers.Dropout(0.1),
    layers.LSTM(128, return_sequences=False),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(1)
])

# Callbacks for early stopping and learning rate reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6)

model.compile(loss='mse', optimizer=Adam(learning_rate=0.0001), metrics=['mean_absolute_error'])
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=250, callbacks=[early_stopping, reduce_lr])

# Predictions and evaluations
train_predictions = model.predict(X_train).flatten()
val_predictions = model.predict(X_val).flatten()
test_predictions = model.predict(X_test).flatten()

# Reverse scaling
train_predictions = scaler.inverse_transform(train_predictions.reshape(-1, 1)).flatten()
val_predictions = scaler.inverse_transform(val_predictions.reshape(-1, 1)).flatten()
test_predictions = scaler.inverse_transform(test_predictions.reshape(-1, 1)).flatten()
y_train = scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()
y_val = scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()
y_test = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# Plot predictions vs actual values
plt.figure(figsize=(12, 6))
plt.plot(dates_train, train_predictions, color='cyan', label='Training Predictions')
plt.plot(dates_train, y_train, color='green', alpha=0.5, label='Training Observations')
plt.plot(dates_val, val_predictions, color='orange', label='Validation Predictions')
plt.plot(dates_val, y_val, color='orange', alpha=0.5, label='Validation Observations')
plt.plot(dates_test, test_predictions, color='red', label='Test Predictions')
plt.plot(dates_test, y_test, color='red', alpha=0.5, label='Test Observations')
plt.title("Predictions vs Actual Stock Prices", fontsize=16, fontweight='bold')
plt.xlabel("Date", fontsize=14)
plt.ylabel("Stock Price", fontsize=14)
plt.legend()
plt.grid()
plt.show()

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, test_predictions))
print("RMSE:", rmse)

# Calculate accuracy in percentage using MAPE
mape = mean_absolute_percentage_error(y_test, test_predictions)
accuracy = 100 - mape * 100
print(f"Accuracy: {accuracy:.2f}%")
