In [9]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Loading the datasets
disney_data = pd.read_csv('datasets/disney_daily.csv')
warner_data = pd.read_csv('datasets/warner_bros_daily.csv')

# Initial Exploration
def initial_exploration(df, company_name):
    print(f"\nInitial Exploration for {company_name}")
    print(df.info())
    print(df.describe())
    print(df.head())

disney_data['date'] = pd.to_datetime(disney_data['date'])
warner_data['date'] = pd.to_datetime(warner_data['date'])

initial_exploration(disney_data, "Disney")
initial_exploration(warner_data, "Warner Bros")

# Cleaning the Data
def clean_data(df):
    # Renaming columns to remove spaces and make them lowercase
    df.columns = df.columns.str.lower().str.replace(' ', '_')
    
    # Handling missing values
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)
    
    return df

disney_data = clean_data(disney_data)
warner_data = clean_data(warner_data)

# Exploratory Data Analysis (EDA)
def plot_stock_prices(df, company_name):
    plt.figure(figsize=(12, 6))
    sns.lineplot(x=df['datetime'], y=df['close'], label=f'{company_name} Closing Prices')
    plt.xlabel('Year')
    plt.ylabel('Stock Price')
    plt.title(f'{company_name} Stock Prices Over Time')
    plt.legend()
    plt.show()

plot_stock_prices(disney_data, "Disney")
plot_stock_prices(warner_data, "Warner Bros")

# Comparing both companies
plt.figure(figsize=(12, 6))
sns.lineplot(x=disney_data['date'], y=disney_data['close'], label='Disney Closing Prices')
sns.lineplot(x=warner_data['date'], y=warner_data['close'], label='Warner Bros Closing Prices')
plt.xlabel('Year')
plt.ylabel('Stock Price')
plt.title('Comparison of Disney and Warner Bros Stock Prices')
plt.legend()
plt.show()

# Feature Engineering and Splitting Data for Predictions
# Filtering data up to 2019 for training
disney_train_data = disney_data[disney_data['datetime'].dt.year <= 2019]
warner_train_data = warner_data[warner_data['datetime'].dt.year <= 2019]

disney_features = disney_train_data[['open', 'high', 'low', 'volume']]
disney_target = disney_train_data['close']

warner_features = warner_train_data[['open', 'high', 'low', 'volume']]
warner_target = warner_train_data['close']

# Scaling the features
scaler_disney = StandardScaler()
X_train_scaled_disney = scaler_disney.fit_transform(disney_features)

scaler_warner = StandardScaler()
X_train_scaled_warner = scaler_warner.fit_transform(warner_features)

# Model Building and Future Predictions
# Neural Network Model Building for Disney
model_disney = Sequential()
model_disney.add(Dense(4, input_dim=X_train_scaled_disney.shape[1], activation='relu'))
model_disney.add(Dense(2, activation='relu'))
model_disney.add(Dense(1))

optimizer_disney = Adam(learning_rate=0.001)
model_disney.compile(optimizer=optimizer_disney, loss='mse', metrics=['mae'])

# Training the model
history_disney = model_disney.fit(X_train_scaled_disney, disney_target, epochs=50, batch_size=8, verbose=1, validation_split=0.2)

# Plotting training history for Disney
plt.figure(figsize=(10, 6))
sns.lineplot(data=history_disney.history, palette='tab10', linewidth=2.5)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss for Disney Model')
plt.legend(['Training Loss', 'Validation Loss'])
plt.show()

# Preparing data for future predictions (2020-2024)
disney_future_data = disney_data[(disney_data['datetime'].dt.year > 2019) & (disney_data['datetime'].dt.year <= 2024)]
disney_future_features = disney_future_data[['open', 'high', 'low', 'volume']]
disney_future_features_scaled = scaler_disney.transform(disney_future_features)

y_pred_disney = model_disney.predict(disney_future_features_scaled)

# Plotting Future Predictions for Disney
plt.figure(figsize=(10, 6))
sns.scatterplot(x=disney_future_data['datetime'], y=disney_future_data['close'], label='Actual Stock Prices', alpha=0.6, s=10)
sns.scatterplot(x=disney_future_data['datetime'], y=y_pred_disney.flatten(), label='Predicted Stock Prices', alpha=0.6, s=10)
plt.xlabel('Date')
plt.ylabel('Stock Prices')
plt.title('Actual vs Predicted Stock Prices for Disney (2020-2024)')
plt.legend()
plt.show()

# Calculating metrics for Disney
mse_disney = mean_squared_error(disney_future_data['close'], y_pred_disney)
mae_disney = mean_absolute_error(disney_future_data['close'], y_pred_disney)
rmse_disney = np.sqrt(mse_disney)
r2_disney = r2_score(disney_future_data['close'], y_pred_disney)
training_loss_disney = history_disney.history['loss'][-1]
validation_loss_disney = history_disney.history['val_loss'][-1]
print(f"Disney Model - Mean Squared Error: {mse_disney}")
print(f"Disney Model - Mean Absolute Error: {mae_disney}")
print(f"Disney Model - Root Mean Squared Error: {rmse_disney}")
print(f"Disney Model - R2 Score: {r2_disney}")
print(f"Disney Model - Training Loss: {training_loss_disney}")
print(f"Disney Model - Validation Loss: {validation_loss_disney}")

# Neural Network Model Building for Warner Bros
model_warner = Sequential()
model_warner.add(Dense(4, input_dim=X_train_scaled_warner.shape[1], activation='relu'))
model_warner.add(Dense(2, activation='relu'))
model_warner.add(Dense(1))

optimizer_warner = Adam(learning_rate=0.001)
model_warner.compile(optimizer=optimizer_warner, loss='mse', metrics=['mae'])

# Training the model
history_warner = model_warner.fit(X_train_scaled_warner, warner_target, epochs=50, batch_size=8, verbose=1, validation_split=0.2)

# Plotting training history for Warner Bros
plt.figure(figsize=(10, 6))
sns.lineplot(data=history_warner.history, palette='tab10', linewidth=2.5)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss for Warner Bros Model')
plt.legend(['Training Loss', 'Validation Loss'])
plt.show()

# Preparing data for future predictions (2020-2024)
warner_future_data = warner_data[(warner_data['datetime'].dt.year > 2019) & (warner_data['datetime'].dt.year <= 2024)]
warner_future_features = warner_future_data[['open', 'high', 'low', 'volume']]
warner_future_features_scaled = scaler_warner.transform(warner_future_features)

y_pred_warner = model_warner.predict(warner_future_features_scaled)

# Plotting Future Predictions for Warner Bros
plt.figure(figsize=(10, 6))
sns.scatterplot(x=warner_future_data['datetime'], y=warner_future_data['close'], label='Actual Stock Prices', alpha=0.6, s=10)
sns.scatterplot(x=warner_future_data['datetime'], y=y_pred_warner.flatten(), label='Predicted Stock Prices', alpha=0.6, s=10)
plt.xlabel('Date')
plt.ylabel('Stock Prices')
plt.title('Actual vs Predicted Stock Prices for Warner Bros (2020-2024)')
plt.legend()
plt.show()

# Calculating metrics for Warner Bros
mse_warner = mean_squared_error(warner_future_data['close'], y_pred_warner)
mae_warner = mean_absolute_error(warner_future_data['close'], y_pred_warner)
rmse_warner = np.sqrt(mse_warner)
r2_warner = r2_score(warner_future_data['close'], y_pred_warner)
training_loss_warner = history_warner.history['loss'][-1]
validation_loss_warner = history_warner.history['val_loss'][-1]
print(f"Warner Bros Model - Mean Squared Error: {mse_warner}")
print(f"Warner Bros Model - Mean Absolute Error: {mae_warner}")
print(f"Warner Bros Model - Root Mean Squared Error: {rmse_warner}")
print(f"Warner Bros Model - R2 Score: {r2_warner}")
print(f"Warner Bros Model - Training Loss: {training_loss_warner}")
print(f"Warner Bros Model - Validation Loss: {validation_loss_warner}")


KeyError: 'date'