In [None]:
# Run the following code to install all the required libraries
%pip install pandas numpy matplotlib seaborn scikit-learn tensorflow boto3 pycaret


^C
Note: you may need to restart the kernel to use updated packages.




In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os

In [None]:
# Check if the files exist before loading the datasets
disney_file_path = 'datasets/disney_daily.csv'
warner_file_path = 'datasets/warner_bros_daily.csv'

if not os.path.exists(disney_file_path) or not os.path.exists(warner_file_path):
    raise FileNotFoundError("One or both of the dataset files do not exist. Please check the file paths.")

# Loading the datasets
disney_data = pd.read_csv(disney_file_path)
warner_data = pd.read_csv(warner_file_path)

# Correcting the 'Date' column to handle timezone and format issues
disney_data['Date'] = disney_data['Date'].astype(str).str.replace(r"(\+|-)\d{2}:\d{2}$", "", regex=True)
disney_data['Date'] = pd.to_datetime(disney_data['Date'], errors='coerce', utc=True).dt.tz_localize(None)

warner_data['Date'] = warner_data['Date'].astype(str).str.replace(r"(\+|-)\d{2}:\d{2}$", "", regex=True)
warner_data['Date'] = pd.to_datetime(warner_data['Date'], errors='coerce', utc=True).dt.tz_localize(None)

# Dropping rows where 'Date' could not be converted properly
disney_data.dropna(subset=['Date'], inplace=True)
warner_data.dropna(subset=['Date'], inplace=True)

# Renaming columns to remove spaces and make them lowercase
disney_data.columns = disney_data.columns.str.lower().str.replace(' ', '_')
warner_data.columns = warner_data.columns.str.lower().str.replace(' ', '_')

# Initial Exploration
def initial_exploration(df, company_name):
    print(f"\nInitial Exploration for {company_name}")
    print(df.info())
    print(df.describe())
    print(df.head())

initial_exploration(disney_data, "Disney")
initial_exploration(warner_data, "Warner Bros")

# Cleaning the Data
def clean_data(df):
    # Handling missing values
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)
    return df

disney_data = clean_data(disney_data)
warner_data = clean_data(warner_data)

# Exploratory Data Analysis (EDA)
def plot_stock_prices(df, company_name):
    plt.figure(figsize=(12, 6))
    sns.lineplot(x=df['date'], y=df['close'], label=f'{company_name} Closing Prices')
    plt.xlabel('Year')
    plt.ylabel('Stock Price')
    plt.title(f'{company_name} Stock Prices Over Time')
    plt.legend()
    plt.show()

plot_stock_prices(disney_data, "Disney")
plot_stock_prices(warner_data, "Warner Bros")

# Comparing both companies
plt.figure(figsize=(12, 6))
sns.lineplot(x=disney_data['date'], y=disney_data['close'], label='Disney Closing Prices')
sns.lineplot(x=warner_data['date'], y=warner_data['close'], label='Warner Bros Closing Prices')
plt.xlabel('Year')
plt.ylabel('Stock Price')
plt.title('Comparison of Disney and Warner Bros Stock Prices')
plt.legend()
plt.show()


In [None]:
# Feature Engineering and Splitting Data for Predictions
# Filtering data up to 2019 for training
disney_train_data = disney_data[disney_data['date'].dt.year <= 2019]

# Ensure that there is data available for future predictions
disney_future_data = disney_data[(disney_data['date'].dt.year > 2019)]
if disney_future_data.empty:
    raise ValueError("No data available for future predictions for Disney. Please check the dataset or the filtering conditions.")

disney_features = disney_train_data[['open', 'high', 'low', 'volume']]
disney_target = disney_train_data['close']

# Scaling the features
scaler_disney = StandardScaler()
X_train_scaled_disney = scaler_disney.fit_transform(disney_features)

# Model Building and Training
# Neural Network Model Building for Disney
model_disney = Sequential()
model_disney.add(Dense(64, input_dim=X_train_scaled_disney.shape[1], activation='relu'))
model_disney.add(Dense(32, activation='relu'))
model_disney.add(Dense(16, activation='relu'))
model_disney.add(Dense(1))

optimizer_disney = Adam(learning_rate=0.001)
model_disney.compile(optimizer=optimizer_disney, loss='mse', metrics=['mae'])

# Early stopping and model checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, min_delta=0.001)
model_checkpoint = ModelCheckpoint(filepath='best_disney_model.keras', monitor='val_loss', save_best_only=True)

# Training the model
history_disney = model_disney.fit(X_train_scaled_disney, disney_target, epochs=50, batch_size=8, verbose=1, validation_split=0.2, callbacks=[early_stopping, model_checkpoint])

# Plotting training history for Disney
plt.figure(figsize=(10, 6))
sns.lineplot(data=history_disney.history, palette='tab10', linewidth=2.5)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss for Disney Model')
plt.legend(['Training Loss', 'Validation Loss'])
plt.show()

In [None]:
# Feature Engineering and Splitting Data for Predictions
# Filtering data up to 2019 for training
warner_train_data = warner_data[warner_data['date'].dt.year <= 2019]

# Ensure that there is data available for future predictions
warner_future_data = warner_data[(warner_data['date'].dt.year > 2019)]
if warner_future_data.empty:
    raise ValueError("No data available for future predictions for Warner Bros. Please check the dataset or the filtering conditions.")

warner_features = warner_train_data[['open', 'high', 'low', 'volume']]
warner_target = warner_train_data['close']

# Scaling the features
scaler_warner = StandardScaler()
X_train_scaled_warner = scaler_warner.fit_transform(warner_features)

# Neural Network Model Building for Warner Bros
model_warner = Sequential()
model_warner.add(Dense(64, input_dim=X_train_scaled_warner.shape[1], activation='relu'))
model_warner.add(Dense(32, activation='relu'))
model_warner.add(Dense(16, activation='relu'))
model_warner.add(Dense(1))

optimizer_warner = Adam(learning_rate=0.001)
model_warner.compile(optimizer=optimizer_warner, loss='mse', metrics=['mae'])

# Training the model
history_warner = model_warner.fit(X_train_scaled_warner, warner_target, epochs=50, batch_size=8, verbose=1, validation_split=0.2, callbacks=[early_stopping])

# Plotting training history for Warner Bros
plt.figure(figsize=(10, 6))
sns.lineplot(data=history_warner.history, palette='tab10', linewidth=2.5)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss for Warner Bros Model')
plt.legend(['Training Loss', 'Validation Loss'])
plt.show()

In [None]:
# Preparing data for future predictions (2020-2024) and saving the best models

disney_future_features = disney_future_data[['open', 'high', 'low', 'volume']]
disney_future_features_scaled = scaler_disney.transform(disney_future_features)

# Predicting Future Stock Prices for Disney
y_pred_disney = model_disney.predict(disney_future_features_scaled)

# Saving Disney model
model_disney.save('best_disney_model.keras')

# Plotting Future Predictions for Disney
plt.figure(figsize=(10, 6))
sns.scatterplot(x=disney_future_data['date'], y=disney_future_data['close'], label='Actual Stock Prices', alpha=0.6, s=10)
sns.scatterplot(x=disney_future_data['date'], y=y_pred_disney.flatten(), label='Predicted Stock Prices', alpha=0.6, s=10)
plt.xlabel('Date')
plt.ylabel('Stock Prices')
plt.title('Actual vs Predicted Stock Prices for Disney (2020-2024)')
plt.legend()
plt.show()
# Calculating metrics for Disney
mse_disney = mean_squared_error(disney_future_data['close'], y_pred_disney)
mae_disney = mean_absolute_error(disney_future_data['close'], y_pred_disney)
rmse_disney = np.sqrt(mse_disney)
r2_disney = r2_score(disney_future_data['close'], y_pred_disney)
training_loss_disney = history_disney.history['loss'][-1]
validation_loss_disney = history_disney.history['val_loss'][-1]
print(f"Disney Model - Mean Squared Error: {mse_disney}")
print(f"Disney Model - Mean Absolute Error: {mae_disney}")
print(f"Disney Model - Root Mean Squared Error: {rmse_disney}")
print(f"Disney Model - R2 Score: {r2_disney}")
print(f"Disney Model - Training Loss: {training_loss_disney}")
print(f"Disney Model - Validation Loss: {validation_loss_disney}")

In [None]:
# Preparing data for future predictions for Warner Bros
warner_future_features = warner_future_data[['open', 'high', 'low', 'volume']]
warner_future_features_scaled = scaler_warner.transform(warner_future_features)

# Predicting Future Stock Prices for Warner Bros
y_pred_warner = model_warner.predict(warner_future_features_scaled)

# Saving Warner Bros model
model_warner.save('best_warner_bros_model.keras')

# Plotting Future Predictions for Warner Bros
plt.figure(figsize=(10, 6))
sns.scatterplot(x=warner_future_data['date'], y=warner_future_data['close'], label='Actual Stock Prices', alpha=0.6, s=10)
sns.scatterplot(x=warner_future_data['date'], y=y_pred_warner.flatten(), label='Predicted Stock Prices', alpha=0.6, s=10)
plt.xlabel('Date')
plt.ylabel('Stock Prices')
plt.title('Actual vs Predicted Stock Prices for Warner Bros (2020-2024)')
plt.legend()
plt.show()

# Calculating metrics for Warner Bros
mse_warner = mean_squared_error(warner_future_data['close'], y_pred_warner)
mae_warner = mean_absolute_error(warner_future_data['close'], y_pred_warner)
rmse_warner = np.sqrt(mse_warner)
r2_warner = r2_score(warner_future_data['close'], y_pred_warner)
training_loss_warner = history_warner.history['loss'][-1]
validation_loss_warner = history_warner.history['val_loss'][-1]
print(f"Warner Bros Model - Mean Squared Error: {mse_warner}")
print(f"Warner Bros Model - Mean Absolute Error: {mae_warner}")
print(f"Warner Bros Model - Root Mean Squared Error: {rmse_warner}")
print(f"Warner Bros Model - R2 Score: {r2_warner}")
print(f"Warner Bros Model - Training Loss: {training_loss_warner}")
print(f"Warner Bros Model - Validation Loss: {validation_loss_warner}")

