In [286]:
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [287]:
#Import GDP data
data = pd.read_csv('GDP_US.csv')
print(data)

          DATE        GDP
0     1/1/1947    243.164
1     4/1/1947    245.968
2     7/1/1947    249.585
3    10/1/1947    259.745
4     1/1/1948    265.742
..         ...        ...
300   1/1/2022  24740.480
301   4/1/2022  25248.476
302   7/1/2022  25723.941
303  10/1/2022  26137.992
304   1/1/2023  26465.865

[305 rows x 2 columns]


In [288]:
#Plotting GDP growth over time
fig = go.Figure(data=go.Heatmap(
                   z=[data['GDP']],
                   x=data.index,
                   y=['GDP'],
                   colorscale='Viridis'))

fig.update_layout(title='GDP Growth over Time',
                  xaxis_title='Time Period',
                  yaxis_title='')

fig.show()

In [289]:
#Converting monthly data to quarterly data using resample method
data['DATE'] = pd.to_datetime(data['DATE'], format='%m/%d/%Y')
data.set_index('DATE', inplace=True)
quarterly_data = data.resample('Q').mean()
#print(quarterly_data['Time Period'])
#print(quarterly_data['GDP Growth'])
quarterly_data['GDP'].fillna(0.00, inplace=True)
print(quarterly_data)

                  GDP
DATE                 
1947-03-31    243.164
1947-06-30    245.968
1947-09-30    249.585
1947-12-31    259.745
1948-03-31    265.742
...               ...
2022-03-31  24740.480
2022-06-30  25248.476
2022-09-30  25723.941
2022-12-31  26137.992
2023-03-31  26465.865

[305 rows x 1 columns]


In [290]:
# Analysing the recession period with respect to GDP
quarterly_data['GDP_Indicator'] = ((quarterly_data['GDP'].pct_change() < 0) & (quarterly_data['GDP'].pct_change().shift(1) < 0))
GDP_Indicator= quarterly_data['GDP_Indicator']
GDP_Indicator.to_csv('GDP_Indicator.csv', index=False)

# Fill missing values with False (since the first quarter cannot be in a recession)
quarterly_data['GDP_Indicator'].fillna(False, inplace=True)

print(quarterly_data['GDP_Indicator'])

quarterly_data['GDP Growth']= quarterly_data['GDP'].pct_change()
quarterly_data['GDP Growth'].fillna(0.00, inplace=True)

print(quarterly_data)

DATE
1947-03-31    False
1947-06-30    False
1947-09-30    False
1947-12-31    False
1948-03-31    False
              ...  
2022-03-31    False
2022-06-30    False
2022-09-30    False
2022-12-31    False
2023-03-31    False
Freq: Q-DEC, Name: GDP_Indicator, Length: 305, dtype: bool
                  GDP  GDP_Indicator  GDP Growth
DATE                                            
1947-03-31    243.164          False    0.000000
1947-06-30    245.968          False    0.011531
1947-09-30    249.585          False    0.014705
1947-12-31    259.745          False    0.040708
1948-03-31    265.742          False    0.023088
...               ...            ...         ...
2022-03-31  24740.480          False    0.016073
2022-06-30  25248.476          False    0.020533
2022-09-30  25723.941          False    0.018831
2022-12-31  26137.992          False    0.016096
2023-03-31  26465.865          False    0.012544

[305 rows x 3 columns]


In [291]:
# Importing Personal Consumption Expenditures Dataset
pcec_data= pd.read_csv('Personal_Consumption_Expenditures_US.csv')
print(pcec_data.head())

#Converting monthly data to quarterly data using resample method
pcec_data['DATE'] = pd.to_datetime(pcec_data['DATE'], format='%Y/%m/%d')
pcec_data.set_index('DATE', inplace=True)
pcec_quarterly_data = pcec_data.resample('Q').mean()
pcec_quarterly_data['PCEC'].fillna(0.00, inplace=True)
print(pcec_quarterly_data)

         DATE     PCEC
0  1947-01-01  156.161
1  1947-04-01  160.031
2  1947-07-01  163.543
3  1947-10-01  167.672
4  1948-01-01  170.372
                 PCEC
DATE                 
1947-03-31    156.161
1947-06-30    160.031
1947-09-30    163.543
1947-12-31    167.672
1948-03-31    170.372
...               ...
2022-03-31  16874.769
2022-06-30  17261.338
2022-09-30  17542.652
2022-12-31  17749.893
2023-03-31  18095.310

[305 rows x 1 columns]


In [292]:
# Analysing the recession period with respect to Personal Consumption Expenditures Dataset
quarterly_data['PCEC'] = pcec_quarterly_data['PCEC']
print (quarterly_data['PCEC'])
PCEC_Indicator= ((quarterly_data['PCEC'].pct_change() < 0 ) & (quarterly_data['PCEC'].shift(1).pct_change()<0))
PCEC_Indicator.to_csv('PCEC_Indicator.csv', index=False)
#quarterly_data['PCEC'].fillna(0.00, inplace=True)
quarterly_data['PCEC_Indicator']=((quarterly_data['PCEC'].pct_change() < 0 ) & (quarterly_data['PCEC'].shift(1).pct_change()<0))
print(quarterly_data['PCEC_Indicator'])

DATE
1947-03-31      156.161
1947-06-30      160.031
1947-09-30      163.543
1947-12-31      167.672
1948-03-31      170.372
                ...    
2022-03-31    16874.769
2022-06-30    17261.338
2022-09-30    17542.652
2022-12-31    17749.893
2023-03-31    18095.310
Freq: Q-DEC, Name: PCEC, Length: 305, dtype: float64
DATE
1947-03-31    False
1947-06-30    False
1947-09-30    False
1947-12-31    False
1948-03-31    False
              ...  
2022-03-31    False
2022-06-30    False
2022-09-30    False
2022-12-31    False
2023-03-31    False
Freq: Q-DEC, Name: PCEC_Indicator, Length: 305, dtype: bool


In [293]:
# Importing Layoffs dataset

layoff_data= pd.read_csv('Layoffs_US.csv')
#print(layoff_data.head())

#Convert monthly data to quarterly data using resample method
layoff_data['DATE'] = pd.to_datetime(layoff_data['DATE'], format='%m/%d/%Y')
layoff_data.set_index('DATE', inplace=True)
layoff_quarterly_data = layoff_data.resample('Q').mean()
layoff_quarterly_data['layoff'].fillna(0.00, inplace=True)
print(layoff_quarterly_data)

                 layoff
DATE                   
2000-12-31  2018.000000
2001-03-31  2069.333333
2001-06-30  1975.666667
2001-09-30  2052.000000
2001-12-31  2165.333333
...                 ...
2022-03-31  1414.333333
2022-06-30  1433.666667
2022-09-30  1520.000000
2022-12-31  1495.000000
2023-03-31  1611.500000

[90 rows x 1 columns]


In [294]:
# Analysing the recession period with respect to Layoffs
quarterly_data['layoff'] = layoff_quarterly_data['layoff']
quarterly_data['layoff'].fillna(0.00, inplace=True)
#lay_off_data_q=layoff_quarterly_data['layoff']
print(quarterly_data['layoff'])
layoff_indicator= quarterly_data['layoff']>5000
quarterly_data['Layoff_Indicator']=layoff_indicator
layoff_indicator.to_csv('Layoffs_ind.csv', index=False)
print(quarterly_data['Layoff_Indicator'])

DATE
1947-03-31       0.000000
1947-06-30       0.000000
1947-09-30       0.000000
1947-12-31       0.000000
1948-03-31       0.000000
                 ...     
2022-03-31    1414.333333
2022-06-30    1433.666667
2022-09-30    1520.000000
2022-12-31    1495.000000
2023-03-31    1611.500000
Freq: Q-DEC, Name: layoff, Length: 305, dtype: float64
DATE
1947-03-31    False
1947-06-30    False
1947-09-30    False
1947-12-31    False
1948-03-31    False
              ...  
2022-03-31    False
2022-06-30    False
2022-09-30    False
2022-12-31    False
2023-03-31    False
Freq: Q-DEC, Name: Layoff_Indicator, Length: 305, dtype: bool


In [295]:
# Importing Unemployment Rate dataset 
unem_data= pd.read_csv('Unemployment_US.csv')
#print(unem_data.head())
#Convert monthly data to quarterly data using resample method
unem_data['DATE'] = pd.to_datetime(unem_data['DATE'], format='%m/%d/%Y')
unem_data.set_index('DATE', inplace=True)
unem_quarterly_data = unem_data.resample('Q').mean()
unem_quarterly_data['UNEM'].fillna(0.00, inplace=True)
print(unem_quarterly_data)

                UNEM
DATE                
1970-03-31  4.157878
1970-06-30  4.769321
1970-09-30  5.329443
1970-12-31  5.897090
1971-03-31  5.979123
...              ...
2021-12-31  4.297647
2022-03-31  3.913936
2022-06-30  3.628854
2022-09-30  3.602464
2022-12-31  3.636928

[212 rows x 1 columns]


In [296]:
# Analysing the recession period with respect to Unemployment Rate
quarterly_data['UNEM'] =unem_quarterly_data['UNEM']
quarterly_data['UNEM'].fillna(0.00, inplace=True)
print(quarterly_data['UNEM'])
unem_indicator= quarterly_data['UNEM']>7.5
quarterly_data['UNEM_Indicator']=unem_indicator
unem_indicator.to_csv('UNEM_ind.csv', index=False)
print(quarterly_data['UNEM_Indicator'])

DATE
1947-03-31    0.000000
1947-06-30    0.000000
1947-09-30    0.000000
1947-12-31    0.000000
1948-03-31    0.000000
                ...   
2022-03-31    3.913936
2022-06-30    3.628854
2022-09-30    3.602464
2022-12-31    3.636928
2023-03-31    0.000000
Freq: Q-DEC, Name: UNEM, Length: 305, dtype: float64
DATE
1947-03-31    False
1947-06-30    False
1947-09-30    False
1947-12-31    False
1948-03-31    False
              ...  
2022-03-31    False
2022-06-30    False
2022-09-30    False
2022-12-31    False
2023-03-31    False
Freq: Q-DEC, Name: UNEM_Indicator, Length: 305, dtype: bool


In [297]:
# Combining all features for final analysis except layoff
quarterly_data['Indicator'] = (quarterly_data['GDP_Indicator'] & quarterly_data['PCEC_Indicator'] & quarterly_data['UNEM_Indicator'] )
print(quarterly_data['Indicator'])
quarterly_data_ind = quarterly_data['Indicator']
quarterly_data_ind.to_csv('quarterly_data_ind.csv', index=True)

DATE
1947-03-31    False
1947-06-30    False
1947-09-30    False
1947-12-31    False
1948-03-31    False
              ...  
2022-03-31    False
2022-06-30    False
2022-09-30    False
2022-12-31    False
2023-03-31    False
Freq: Q-DEC, Name: Indicator, Length: 305, dtype: bool


In [298]:
# Splitting the dataset to test and train
X_train, X_test, y_train, y_test = train_test_split(
    quarterly_data[['GDP','PCEC','UNEM']],
    quarterly_data['Indicator'], test_size=0.3,random_state=0)

In [299]:
#Using Logistic Regression Model for Prediction
model = LogisticRegression()
model.fit(X_train, y_train)

In [300]:
# Evaluating the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.9891304347826086


In [301]:
# Score for training data
model.score(X_train, y_train)

0.9953051643192489

In [302]:
#Score for Test data
model.score(X_test,y_test)

0.9891304347826086

### With Layoffs dataset

In [303]:
# Combining all features for final analysis including layoff
quarterly_data['Indicator_wL'] = (quarterly_data['GDP_Indicator'] & quarterly_data['PCEC_Indicator'] & quarterly_data['UNEM_Indicator'] & quarterly_data['Layoff_Indicator'])
print(quarterly_data['Indicator_wL'])
quarterly_data_ind_wL = quarterly_data['Indicator_wL']
quarterly_data_ind_wL.to_csv('quarterly_data_ind_WL.csv', index=True)

DATE
1947-03-31    False
1947-06-30    False
1947-09-30    False
1947-12-31    False
1948-03-31    False
              ...  
2022-03-31    False
2022-06-30    False
2022-09-30    False
2022-12-31    False
2023-03-31    False
Freq: Q-DEC, Name: Indicator_wL, Length: 305, dtype: bool


In [304]:
X_train, X_test, y_train, y_test = train_test_split(
    quarterly_data[['GDP','PCEC','UNEM','layoff']],
    quarterly_data['Indicator'], test_size=0.3,random_state=0)

In [305]:
model = LogisticRegression()
model.fit(X_train, y_train)


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



In [306]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.9782608695652174


In [307]:
model.score(X_train, y_train)

1.0

In [308]:
model.score(X_test,y_test)

0.9782608695652174

In [309]:
# Plot of GDP growth and recession data
fig = go.Figure()
fig.add_trace(go.Scatter(x=quarterly_data.index, 
                         y=quarterly_data['GDP Growth'], 
                         name='GDP Growth', 
                         line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x=quarterly_data[quarterly_data['GDP_Indicator']].index, 
                         y=quarterly_data[quarterly_data['GDP_Indicator']]['GDP Growth'], 
                         name='GDP_Indicator', line=dict(color='red', width=2)))

fig.update_layout(title='GDP Growth and Recession over Time (Quarterly Data)',
                  xaxis_title='Time Period',
                  yaxis_title='GDP Growth')

fig.show()


In [310]:
# Plot of Personal Consumption Expenditure and recession data
fig = go.Figure()
quarterly_data['PCEC'] = pcec_quarterly_data['PCEC'].pct_change()
fig.add_trace(go.Scatter(x=quarterly_data.index, 
                         y=quarterly_data['PCEC'], 
                         name='PCEC', 
                         line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x=quarterly_data[quarterly_data['PCEC_Indicator']].index, 
                         y=quarterly_data[quarterly_data['PCEC_Indicator']]['PCEC'], 
                         name='PCEC_Indicator', line=dict(color='red', width=2)))

fig.update_layout(title='Personal Consumption Expenditure and Recession over Time (Quarterly Data)',
                  xaxis_title='Time Period',
                  yaxis_title='Personal Consumption Expenditure')

fig.show()


In [311]:
# Plot of Layoffs and recession data
fig = go.Figure()

fig.add_trace(go.Scatter(x=quarterly_data.index, 
                         y=quarterly_data['layoff'], 
                         name='layoffs', 
                         line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x=quarterly_data[quarterly_data['Layoff_Indicator']].index, 
                         y=quarterly_data[quarterly_data['Layoff_Indicator']]['layoff'], 
                         name='Layoff_Indicator', line=dict(color='red', width=2)))

fig.update_layout(title='Layoffs and Recession over Time (Quarterly Data)',
                  xaxis_title='Time Period',
                  yaxis_title='Layoffs')

fig.show()


In [312]:
# Plot of Unemployment Rate and recession data
fig = go.Figure()

fig.add_trace(go.Scatter(x=quarterly_data.index, 
                         y=quarterly_data['UNEM'], 
                         name='Unemployment rate', 
                         line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x=quarterly_data[quarterly_data['UNEM_Indicator']].index, 
                         y=quarterly_data[quarterly_data['UNEM_Indicator']]['UNEM'], 
                         name='UNEM_Indicator', line=dict(color='red', width=2)))

fig.update_layout(title='Unemployment Rate and Recession over Time (Quarterly Data)',
                  xaxis_title='Time Period',
                  yaxis_title='Unemployment Rate')

fig.show()