# Datasets Used:
1. [Finance Sector](https://www.kaggle.com/datasets/camnugent/sandp500)
2. [Energy Sector](https://gml.noaa.gov/ccgg/trends/data.html)
3. [Enviornmental Sector](https://www.kaggle.com/datasets/nicholasjhana/energy-consumption-generation-prices-and-weather?select=energy_dataset.csv)

# Importing Libraries

Importing all the necessary libraries for the implementation of the code.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Importing / Pre-Processing Dataset(s)

Importing and Preprocessing the dataset(s) for the implementation of the code.

## Stock Dataset

In [None]:
# Loading The Dataset
stock_data = pd.read_csv('all_stocks_5yr.csv')

# Converting Date Column To Datetime
stock_data['date'] = pd.to_datetime(stock_data['date'])
stock_data.set_index('date', inplace=True)

# Printing To Test
print(stock_data.head())

In [None]:
# Dropping Missing Values
stock_data.dropna(inplace=True)

# Checking For Missing Values
stock_data.isnull().sum()

## CO2 Dataset

In [None]:
# Loading The Dataset
co2_data=pd.read_csv("co2_daily_mlo.csv")

# Create a new column called 'date'
co2_data['date'] = pd.NaT

# Convert the 'year', 'month', and 'day' columns to strings
co2_data['yr'] = co2_data['yr'].astype(str)
co2_data['mon'] = co2_data['mon'].astype(str)
co2_data['day'] = co2_data['day'].astype(str)

# Concatenate the 'year', 'month', and 'day' columns to form a new column called 'date'
co2_data['date'] = co2_data['yr'] + '-' + co2_data['mon'] + '-' + co2_data['day']

# Converting the 'date' column to datetime format
co2_data['date'] = pd.to_datetime(co2_data['date'])

# Dropping the 'yr', 'mon', and 'day' columns
co2_data.drop(columns=['yr', 'mon', 'day'], inplace=True)

#using date as index and sorting
co2_data.set_index('date',inplace=True)
co2_data.sort_index(inplace=True)

# Printing To Test
print(co2_data.head())

In [None]:
# Dropping Missing Values
co2_data.dropna(inplace=True)

# Checking For Null Values
co2_data.isnull().sum()

## Energy Dataset

In [None]:
# # Loading The Dataset
# energy_data=pd.read_csv("energy_dataset.csv")

# # Printing To Test
# energy_data.head()

In [None]:
# # Selecting Useful Columns
# energy_data = energy_data[['time','total load actual','price actual']]

# # Converting 'time' to datetime
# energy_data['time'] = pd.to_datetime(energy_data['time'], format = "%Y-%m-%d %H:%M:%S", dayfirst = True)

# # Setting 'time' as the index
# energy_data.set_index('time', inplace=True)

# # Printing To Test
# print(energy_data.head())

In [None]:
# # Checking For Missing Values
# energy_data.isnull().sum()

In [None]:
# # Filling Missing Values With Forward Fill
# energy_data.fillna(method='ffill', inplace=True)

# # Checking For Missing Values
# energy_data.isnull().sum()

## Checking If Data Is Stationary

Creating a function to check if the data is stationary or not and accordingly making it stationary.

In [None]:
def check_stationarity(data):
    # Plotting The Data
    plt.figure(figsize=(12, 6))
    plt.plot(data)
    plt.title('Time Series Data')
    plt.show()

    # Dropping Missing Or Infinite Values
    data = data.replace([np.inf, -np.inf], np.nan).dropna()

    # Performing The Augmented Dickey-Fuller Test
    adf_test = sm.tsa.adfuller(data)
    print('ADF Statistic:', adf_test[0])
    print('p-value:', adf_test[1])

    # Checking The Results
    if adf_test[1] < 0.05:
        print('The Data Is Stationary')
    else:
        print('The Data Is Not Stationary')

Checking if Stock Data is Stationary.

In [None]:
check_stationarity(stock_data['close'])

Checking if Environmental Data is Stationary.

In [None]:
check_stationarity(co2_data['CO2 molfrac (ppm)'])

Making Environmental Data Stationary and Running 2nd Iteration.

In [None]:
# Differencing The Data
co2_data['CO2_diff'] = co2_data['CO2 molfrac (ppm)'].diff()

# Re-Checking For Stationarity
check_stationarity(co2_data['CO2_diff'])

Checking if Energy Data is Stationary.

In [None]:
# check_stationarity(energy_data['total load actual'])

As of right now all data is stationary and ready to be used for models (with changes being made according to the model requirements).

# Model Implementation

This is the section where we will begin the implementation of all the models while also finding all the results of the models.

## ARIMA Model

### Stock Data

In [None]:
# Creating An ARIMA Model
model_stock = ARIMA(stock_data['close'], order=(5, 1, 0))
results_stock = model_stock.fit()

# Plotting The ARIMA Model
plt.figure(figsize=(12, 6))
plt.plot(stock_data['close'], label='Original Data')
plt.plot(results_stock.fittedvalues, label='Predicted Data')
plt.title('ARIMA Model for Stock Data')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()

# Printing The ARIMA Model Summary
print(results_stock.summary())

### CO2 Data

In [None]:
# Creating An ARIMA Model
model_co2 = ARIMA(co2_data['CO2 molfrac (ppm)'], order=(5, 1, 0))
results_co2 = model_co2.fit()

# Plotting The ARIMA Model
plt.figure(figsize=(12, 6))
plt.plot(co2_data['CO2 molfrac (ppm)'], label='Original Data')
plt.plot(results_co2.fittedvalues, label='Predicted Data')
plt.title('ARIMA Model for CO2 Data')
plt.xlabel('Date')
plt.ylabel('CO2 Concentration')
plt.legend()
plt.show()

# Printing The ARIMA Model Summary
print(results_co2.summary())

### Energy Data

In [None]:
# Not implemented

## ANN (Artifical Neural Networks) Model

### Stock Data

In [None]:
# Scaling the data
scaler = MinMaxScaler(feature_range=(0, 1))
stock_data_scaled = scaler.fit_transform(stock_data['close'].values.reshape(-1, 1))

# Splitting the data into train and test sets
train_size = int(len(stock_data_scaled) * 0.8)
train_data, test_data = stock_data_scaled[0:train_size], stock_data_scaled[train_size:]

# Creating the input and output sequences for the LSTM model
X_train, y_train = [], []
for i in range(60, len(train_data)):
    X_train.append(train_data[i-60:i])
    y_train.append(train_data[i])

X_train, y_train = np.array(X_train), np.array(y_train)

# Reshaping the input data for the LSTM model
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

# Building the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

# Compiling the model
model.compile(loss='mean_squared_error', optimizer='adam')

# Training the model
model.fit(X_train, y_train, epochs=3, batch_size=32)

# Making predictions
predictions = model.predict(test_data)

# Inverse scaling the predictions
predictions = scaler.inverse_transform(predictions)

In [None]:
# Checking the Accuracy
print('Mean Absolute Error:', np.mean(np.abs(stock_data['close'].values[train_size+60:] - predictions)))
print('Mean Squared Error:', np.mean(np.square(stock_data['close'].values[train_size+60:] - predictions)))
print('Root Mean Squared Error:', np.sqrt(np.mean(np.square(stock_data['close'].values[train_size+60:] - predictions))))
print('Mean Absolute Percentage Error:', np.mean(np.abs((stock_data['close'].values[train_size+60:] - predictions) / stock_data['close'].values[train_size+60:]) * 100))

### CO2 Data

In [None]:
# Scaling the data
scaler = MinMaxScaler(feature_range=(0, 1))
co2_data_scaled = scaler.fit_transform(co2_data['CO2 molfrac (ppm)'].values.reshape(-1, 1))

# Splitting the data into train and test sets
train_size = int(len(co2_data_scaled) * 0.8)
train_data, test_data = co2_data_scaled[0:train_size], co2_data_scaled[train_size:]

# Creating the input and output sequences for the LSTM model
X_train, y_train = [], []
for i in range(60, len(train_data)):
    X_train.append(train_data[i-60:i])
    y_train.append(train_data[i])

X_train, y_train = np.array(X_train), np.array(y_train)

# Reshaping the input data for the LSTM model
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

# Building the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

# Compiling the model
model.compile(loss='mean_squared_error', optimizer='adam')

# Training the model
model.fit(X_train, y_train, epochs=3, batch_size=32)

# Making predictions
predictions = model.predict(test_data)

# Inverse scaling the predictions
predictions = scaler.inverse_transform(predictions)

In [None]:
# Checking the Accuracy
print('Mean Absolute Error:', np.mean(np.abs(co2_data['CO2 molfrac (ppm)'].values[train_size+60:] - predictions)))
print('Mean Squared Error:', np.mean(np.square(co2_data['CO2 molfrac (ppm)'].values[train_size+60:] - predictions)))
print('Root Mean Squared Error:', np.sqrt(np.mean(np.square(co2_data['CO2 molfrac (ppm)'].values[train_size+60:] - predictions))))
print('Mean Absolute Percentage Error:', np.mean(np.abs((co2_data['CO2 molfrac (ppm)'].values[train_size+60:] - predictions) / co2_data['CO2 molfrac (ppm)'].values[train_size+60:]) * 100))

### Energy Data

In [None]:
# Not Implemented

## SARIMA (Seasonal ARIMA) Model

### Stock Data

In [None]:
# make a SARIMA model on the stock data
model = sm.tsa.statespace.SARIMAX(stock_data['close'], order=(1, 0, 1), seasonal_order=(1, 1, 1, 12))
results = model.fit()

start_date = '2013-02-08'
end_date = '2018-02-07'

# Getting the predictions
predictions = results.predict(start=start_date, end=end_date)

# Printing the predictions
print(predictions)

# Plotting the predictions
plt.figure(figsize=(12, 6))
plt.plot(stock_data['close'], label='Actual')
plt.plot(predictions, label='Predictions')
plt.title('Stock Price Predictions')
plt.legend()
plt.show()

In [None]:
# Checking the Accuracy
print('Mean Absolute Error:', np.mean(np.abs(stock_data['close'][start_date:end_date] - predictions)))
print('Mean Squared Error:', np.mean(np.square(stock_data['close'][start_date:end_date] - predictions)))
print('Root Mean Squared Error:', np.sqrt(np.mean(np.square(stock_data['close'][start_date:end_date] - predictions))))
print('Mean Absolute Percentage Error:', np.mean(np.abs((stock_data['close'][start_date:end_date] - predictions) / stock_data['close'][start_date:end_date]) * 100))

### CO2 Data

In [None]:
# Creating a SARIMA model on the CO2 data
model = sm.tsa.statespace.SARIMAX(co2_data['CO2 molfrac (ppm)'], order=(1, 0, 1), seasonal_order=(1, 1, 1, 12))
results = model.fit()

start_date = '1974-05-19'
end_date = '2024-05-10'

# Getting the predictions
predictions = results.predict(start=start_date, end=end_date)

# Printing the predictions
print(predictions)

# Plotting the predictions
plt.figure(figsize=(12, 6))
plt.plot(co2_data['CO2 molfrac (ppm)'], label='Actual')
plt.plot(predictions, label='Predictions')
plt.title('CO2 Predictions')
plt.legend()
plt.show()

In [None]:
# Checking the Accuracy
print('Mean Absolute Error:', np.mean(np.abs(co2_data['CO2 molfrac (ppm)'][start_date:end_date] - predictions)))
print('Mean Squared Error:', np.mean(np.square(co2_data['CO2 molfrac (ppm)'][start_date:end_date] - predictions)))
print('Root Mean Squared Error:', np.sqrt(np.mean(np.square(co2_data['CO2 molfrac (ppm)'][start_date:end_date] - predictions))))
print('Mean Absolute Percentage Error:', np.mean(np.abs((co2_data['CO2 molfrac (ppm)'][start_date:end_date] - predictions) / co2_data['CO2 molfrac (ppm)'][start_date:end_date]) * 100))


### Energy Data

In [None]:
# Not Implemented

## ETS (Exponential Smoothing) Model

### Stock Data

In [None]:
# Applying Exponential Smoothing To The Stock Data
stock_data['close_ewm'] = stock_data['close'].ewm(span=20, adjust=False).mean()

# Plotting The Data
plt.figure(figsize=(12, 6))
plt.plot(stock_data['close'], label='Close Price')
plt.plot(stock_data['close_ewm'], label='Close Price EWM')
plt.title('Stock Data')
plt.legend()
plt.show()

In [None]:
# Checking Accuracy
print('The Mean Absolute Error:', np.mean(np.abs(stock_data['close'] - stock_data['close_ewm'])))
print('The Mean Squared Error:', np.mean((stock_data['close'] - stock_data['close_ewm'])**2))
stock_data['error_percent'] = 100 * np.abs((stock_data['close'] - stock_data['close_ewm']) / stock_data['close'])
mape = stock_data['error_percent'].mean()

print('The Mean Absolute Percentage Error:', int(100 - mape))

### CO2 Data

In [None]:
# Applying Exponential Smoothing To The CO2 Data
co2_data['co2_ewm'] = co2_data['CO2 molfrac (ppm)'].ewm(span=20, adjust=False).mean()

# Plotting The Data
plt.figure(figsize=(12, 6))
plt.plot(co2_data['CO2 molfrac (ppm)'], label='CO2 Concentration')
plt.plot(co2_data['co2_ewm'], label='CO2 Concentration EWM')
plt.title('CO2 Data')
plt.legend()
plt.show()

In [None]:
# Checking Accuracy
print('The Mean Absolute Error:', np.mean(np.abs(co2_data['CO2 molfrac (ppm)'] - co2_data['co2_ewm'])))
print('The Mean Squared Error:', np.mean((co2_data['CO2 molfrac (ppm)'] - co2_data['co2_ewm'])**2))
co2_data['error_percent'] = 100 * np.abs((co2_data['CO2 molfrac (ppm)'] - co2_data['co2_ewm']) / co2_data['CO2 molfrac (ppm)'])
mape = co2_data['error_percent'].mean()

print('The Mean Absolute Percentage Error:', int(100 - mape))

### Energy Data

In [None]:
# Not Implemented

## Prophet Model

### Stock Data

### CO2 Data

### Energy Data

In [None]:
# Not Implemented

## SVR (Support Vector Regression) Model

### Stock Data

In [None]:
# Drop The Name Column
stock_data.drop('Name', axis=1, inplace=True)

# Splitting The Data
X = stock_data.drop('close', axis=1)
y = stock_data['close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting The SVR Model
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

# Predicting The Test Results
y_pred = svr.predict(X_test)

# Plotting The Predictions
plt.figure(figsize=(12, 6))
plt.plot(y_test.values, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.title('Stock Price Predictions')
plt.legend()
plt.show()

In [None]:
# Checking Accuracy
print('The Mean Absolute Error:', np.mean(np.abs(y_test - y_pred)))
print('The Mean Squared Error:', np.mean((y_test - y_pred)**2))
error_percent = 100 * np.abs((y_test - y_pred) / y_test)
mape = error_percent.mean()

print('The Mean Absolute Percentage Error:', int(100 - mape))

### CO2 Data

In [None]:
# Drop all Nan values
co2_data.dropna(inplace=True)

# Splitting The Data
X = co2_data.drop('CO2 molfrac (ppm)', axis=1)
y = co2_data['CO2 molfrac (ppm)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting The SVR Model
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

# Predicting The Test Results
y_pred = svr.predict(X_test)

print(y_pred)

# Plotting The Predictions
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual')
plt.plot(y_pred, label='Predictions')
plt.title('CO2 Predictions')
plt.legend()
plt.show()

In [None]:
# Checking Accuracy
print('The Mean Absolute Error:', np.mean(np.abs(y_test - y_pred)))
print('The Mean Squared Error:', np.mean((y_test - y_pred)**2))
error_percent = 100 * np.abs((y_test - y_pred) / y_test)
mape = error_percent.mean()

print('The Mean Absolute Percentage Error:', int(100 - mape))

### Energy Data

In [None]:
# Not Implemented

## LSTM (Long Short-Term Memory) Model

### Stock Data

In [None]:
# Splitting The Data
X = stock_data.drop('close', axis=1)
y = stock_data['close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Reshaping The Data
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Building The LSTM Model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

# Compiling The Model
model.compile(optimizer='adam', loss='mean_squared_error')

# Fitting The Model
model.fit(X_train, y_train, epochs=50, batch_size=32)

# Predicting The Test Results
y_pred = model.predict(X_test)

# Plotting The Predictions
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual')
plt.plot(y_pred, label='Predictions')
plt.title('Stock Predictions')
plt.legend()
plt.show()

In [None]:
# Checking Accuracy
y_pred = y_pred.ravel()
print('The Mean Absolute Error:', np.mean(np.abs(y_test - y_pred)))
print('The Mean Squared Error:', np.mean((y_test - y_pred)**2))
error_percent = 100 * np.abs((y_test - y_pred) / y_test)
mape = error_percent.mean()

print('The Mean Absolute Percentage Error:', int(100 - mape))

### CO2 Data

In [None]:
# Splitting The Data
X = co2_data.drop('CO2 molfrac (ppm)', axis=1)
y = co2_data['CO2 molfrac (ppm)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Reshaping The Data
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Building The LSTM Model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

# Compiling The Model
model.compile(optimizer='adam', loss='mean_squared_error')

# Fitting The Model
model.fit(X_train, y_train, epochs=100, batch_size=32)

# Predicting The Test Results
y_pred = model.predict(X_test)

# Plotting The Predictions
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual')
plt.plot(y_pred, label='Predictions')
plt.title('CO2 Predictions')
plt.legend()
plt.show()

In [None]:
# Checking Accuracy
y_pred = y_pred.ravel()
print('The Mean Absolute Error:', np.mean(np.abs(y_test - y_pred)))
print('The Mean Squared Error:', np.mean((y_test - y_pred)**2))
error_percent = 100 * np.abs((y_test - y_pred) / y_test)
mape = error_percent.mean()

print('The Mean Absolute Percentage Error:', int(100 - mape))

### Energy Data

In [None]:
# Not Implemented

## Hybrid Model

### Stock Data

### CO2 Data

### Energy Data

In [None]:
# Not Implemented

# Frontend Implementation

In this section we will implement the frontend of the project using Streamlit since it is quite easy to use and implement.