# The Packages

In [1]:
import numpy as np
import pandas as pd
import os
import gc
import datetime
import warnings

In [2]:
# PACF - ACF
# ------------------------------------------------------
import statsmodels.api as sm


In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
# DATA VISUALIZATION
# ------------------------------------------------------
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
import plotly.express as px


In [5]:
# CONFIGURATIONS
# ------------------------------------------------------
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.2f}'.format
warnings.filterwarnings('ignore')

# Utility Functions

In [6]:
def plotSeries(time, series, lab,form='-'):
    plt.plot(time, series, form, label=lab)
    plt.xlabel("Date")
    plt.ylabel("Close Price")
    plt.grid(True)

# The Data

In [7]:
wfc = pd.read_csv('Dataset/WFC Daily1.csv')

In [8]:
wfc.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2000-01-03,20.16,20.19,19.0,19.56,10.44,9037000
1,2000-01-04,18.62,18.84,18.0,18.59,9.92,15237200
2,2000-01-05,18.5,18.88,18.03,18.41,9.82,8228200
3,2000-01-06,18.53,19.5,18.47,19.22,10.25,8326800
4,2000-01-07,19.62,19.84,19.28,19.56,10.44,7711200


In [9]:
wfc.Date.dtype

dtype('O')

In [10]:
wfc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5754 entries, 0 to 5753
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       5754 non-null   object 
 1   Open       5754 non-null   float64
 2   High       5754 non-null   float64
 3   Low        5754 non-null   float64
 4   Close      5754 non-null   float64
 5   Adj Close  5754 non-null   float64
 6   Volume     5754 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 314.8+ KB


In [11]:
wfc['Date'] = pd.to_datetime(wfc['Date'])
wfc.drop(['Open','High','Low','Adj Close', 'Volume'], axis=1, inplace = True)
wfc.set_index(['Date'],inplace=True)

In [12]:
wfc.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2000-01-03,19.56
2000-01-04,18.59
2000-01-05,18.41
2000-01-06,19.22
2000-01-07,19.56


In [13]:
from pycaret.time_series import *
setup(data, fh = 7, fold = 3, session_id = 123)

ModuleNotFoundError: No module named 'pycaret'

# Saving the Data Frames

# The Split

In [None]:
min_date = wfc.Date.min()
max_date = wfc.Date.max()
print("Min:", min_date, "Max:", max_date)

In [None]:
train_percent = .75
valid_percent = .12

time_between = max_date - min_date

train_cutoff = min_date + train_percent*time_between
valid_cutoff = min_date + (train_percent + valid_percent)*time_between

print("Training Cutoff Date: {:s}".format(str(train_cutoff)))
print("Validation Cutoff Date: {:s}".format(str(valid_cutoff)))

In [None]:
train_df = wfc[wfc.Date <= train_cutoff]
valid_df = wfc[(wfc.Date > train_cutoff) & (wfc.Date <= valid_cutoff)]
test_df = wfc[wfc.Date > valid_cutoff]


In [None]:
print("Dataset shape: {:s}".format(str(wfc.shape)))
print("Train dataset shape: {:s}".format(str(train_df.shape)))
print("Vaid dataset shape: {:s}".format(str(valid_df.shape)))
print("Test dataset shape: {:s}".format(str(test_df.shape)))

# Stock Price Graph

In [None]:
plt.figure(figsize=(18, 10))

plotSeries(train_df.Date, train_df.Close, "Training Data")
plotSeries(valid_df.Date, valid_df.Close, "Validation Data")
plotSeries(test_df.Date, test_df.Close, "Test Data")
plt.legend(loc='upper left')