# Simple Recurrent Neural Network

## Import 

In [32]:
#Importing libraries for data analysis
import pandas as pd
import matplotlib as plt

In [7]:
df = pd.read_csv("raw_data/data.csv")

In [38]:
df.tail()

Unnamed: 0,Time,DFF,CPIAUCSL,CPILFESL,UNRATE,WTISPLC,INDPRO,MABMM301USM189S,A576RC1
752,2022-09-01,2.56,8.21485,6.64296,3.5,84.26,4.73131,2.5637,9.07203
753,2022-10-01,3.08,7.76249,6.30176,3.7,87.55,3.18912,1.28501,8.07029
754,2022-11-01,3.78,7.13535,5.97198,3.6,84.37,1.98468,0.02623,7.43424
755,2022-12-01,4.1,6.44494,5.70386,3.5,76.44,1.14673,-1.31457,6.94998
756,2023-01-01,4.33,6.34716,5.54757,3.4,78.12,0.78956,,7.8618


In [9]:
df = df.drop("Unnamed: 0", axis = 1)

## Exploratory Data Analysis

In [34]:
df.shape

(757, 9)

In [10]:
df.describe()

Unnamed: 0,DFF,CPIAUCSL,CPILFESL,UNRATE,WTISPLC,INDPRO,MABMM301USM189S,A576RC1
count,757.0,757.0,757.0,757.0,757.0,757.0,744.0,757.0
mean,4.789736,3.76988,3.713922,5.949802,32.280452,2.552905,7.171975,6.21076
std,3.69764,2.844571,2.539275,1.683161,29.122856,4.828554,3.583807,3.214712
min,0.05,-1.95876,0.60272,3.4,2.92,-17.01813,-1.31457,-5.86048
25%,1.82,1.7595,1.98772,4.8,11.16,0.43414,5.165637,4.37276
50%,4.68,3.00457,2.75229,5.7,21.688,2.89442,6.89693,6.00813
75%,6.58,4.69592,4.73251,7.0,47.02,5.5785,8.47832,8.49285
max,19.1,14.59227,13.60449,14.7,133.93,16.55575,26.88713,14.79116


In [11]:
df.dtypes

Time                object
DFF                float64
CPIAUCSL           float64
CPILFESL           float64
UNRATE             float64
WTISPLC            float64
INDPRO             float64
MABMM301USM189S    float64
A576RC1            float64
dtype: object

## Data preprocessing

In [12]:
#Check length of df before changing anything
df.duplicated()
len(df)

757

In [13]:
#Check for duplicates
df.duplicated().sum()

0

In [14]:
#Detecting missing data (Counting the number of NaN for each column)
df.isnull().sum().sort_values(ascending=False)

MABMM301USM189S    13
Time                0
DFF                 0
CPIAUCSL            0
CPILFESL            0
UNRATE              0
WTISPLC             0
INDPRO              0
A576RC1             0
dtype: int64

In [25]:
#Agreed strategy in the team: delete rows
df.dropna(subset="MABMM301USM189S", )


Unnamed: 0,Time,DFF,CPIAUCSL,CPILFESL,UNRATE,WTISPLC,INDPRO,MABMM301USM189S,A576RC1
12,1961-01-01,1.45,1.60027,0.98361,6.6,2.97,-8.45377,5.33199,1.22177
13,1961-02-01,2.54,1.46209,0.65359,6.9,2.97,-7.74411,6.06568,0.96012
14,1961-03-01,2.02,1.46209,0.98039,6.9,2.97,-6.34204,6.34815,1.17951
15,1961-04-01,1.49,0.91401,0.98039,7.0,2.97,-3.65304,6.59780,0.91575
16,1961-05-01,1.98,0.91309,0.98039,7.1,2.97,-2.05700,7.07876,1.35185
...,...,...,...,...,...,...,...,...,...
751,2022-08-01,2.33,8.22736,6.30050,3.7,93.67,3.50338,3.82332,9.13152
752,2022-09-01,2.56,8.21485,6.64296,3.5,84.26,4.73131,2.56370,9.07203
753,2022-10-01,3.08,7.76249,6.30176,3.7,87.55,3.18912,1.28501,8.07029
754,2022-11-01,3.78,7.13535,5.97198,3.6,84.37,1.98468,0.02623,7.43424


In [26]:
#Converting "time" into date-time data type
df["Time"] =  pd.to_datetime(df["Time"])
#format='%Y-%m-%d%'
#%d/%m/%Y


In [27]:
df.dtypes

Time               datetime64[ns]
DFF                       float64
CPIAUCSL                  float64
CPILFESL                  float64
UNRATE                    float64
WTISPLC                   float64
INDPRO                    float64
MABMM301USM189S           float64
A576RC1                   float64
dtype: object

In [37]:
df.tail()

Unnamed: 0,Time,DFF,CPIAUCSL,CPILFESL,UNRATE,WTISPLC,INDPRO,MABMM301USM189S,A576RC1
752,2022-09-01,2.56,8.21485,6.64296,3.5,84.26,4.73131,2.5637,9.07203
753,2022-10-01,3.08,7.76249,6.30176,3.7,87.55,3.18912,1.28501,8.07029
754,2022-11-01,3.78,7.13535,5.97198,3.6,84.37,1.98468,0.02623,7.43424
755,2022-12-01,4.1,6.44494,5.70386,3.5,76.44,1.14673,-1.31457,6.94998
756,2023-01-01,4.33,6.34716,5.54757,3.4,78.12,0.78956,,7.8618


In [30]:
#Checking ideas for feature importance
df.corr()

  df.corr()


Unnamed: 0,DFF,CPIAUCSL,CPILFESL,UNRATE,WTISPLC,INDPRO,MABMM301USM189S,A576RC1
DFF,1.0,0.70492,0.753935,0.051238,-0.423383,0.071652,0.01134,0.567413
CPIAUCSL,0.70492,1.0,0.927689,0.062215,-0.094958,-0.068778,0.06132,0.565328
CPILFESL,0.753935,0.927689,1.0,0.173739,-0.19934,-0.168926,0.101609,0.507512
UNRATE,0.051238,0.062215,0.173739,1.0,0.175681,-0.337193,0.257934,-0.350085
WTISPLC,-0.423383,-0.094958,-0.19934,0.175681,1.0,-0.194647,-0.08761,-0.338625
INDPRO,0.071652,-0.068778,-0.168926,-0.337193,-0.194647,1.0,-0.071655,0.620889
MABMM301USM189S,0.01134,0.06132,0.101609,0.257934,-0.08761,-0.071655,1.0,0.105861
A576RC1,0.567413,0.565328,0.507512,-0.350085,-0.338625,0.620889,0.105861,1.0


In [None]:
#Check for periodicity --> Apply for Month on a yearly basis

In [None]:
#Check data for seasonality --> discuss with TA! --> we don't need


#from statsmodels.tsa.seasonal import seasonal_decompose
#from statsmodels.tsa.stattools import adfuller

#adfuller(df.value)[1]  # p-value
#print('additive resid: ', adfuller(result_add.resid.dropna())[1])
#print('multiplicative resid: ', adfuller(result_mul.resid.dropna())[1])




# Additive Decomposition (y = Trend + Seasonal + Residuals)
#result_add = seasonal_decompose(df['value'], model='additive')
#result_add.plot();



# Multiplicative Decomposition (y = Trend * Seasonal * Residuals)
#result_mul = seasonal_decompose(df['value'], model='multiplicative')
#result_mul.plot();





In [None]:
#Achieve stationarity --> Discuss with TA!

#from statsmodels.graphics.tsaplots import plot_acf
#plot_acf(df.value, lags=50)
#plt.show()



In [28]:
#Importing libraries for building of models

from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
#Declaring variables
X = df.drop(columns="CPILFESL")
y = df["CPILFESL"]


In [None]:
#Train test split the data --> discuss with TA
df_train = df.iloc[:index]
df_test = df.iloc[index:]

In [None]:
# Creating the model
## 1- RNN Architecture
model = Sequential()
model.add(layers.SimpleRNN(units=2, activation='tanh', input_shape=(4,3)))
model.add(layers.Dense(1, activation="linear"))

## 2- Compilation
model.compile(loss='mse', 
              optimizer=Adam(lr=0.5)) # vhigh lr so we can converge a little with such a small dataset

## 3- Fit
model.fit(X, y, epochs=2000, verbose=0)

## 4- Predict
#model.predict(X) 

In [None]:
#Predicting with the model
