In [201]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm
from scipy.stats import pearsonr
import datetime as dt
import seaborn as sns

In [202]:
# Load or generate your time series data
data = pd.read_csv('Monthly Volume and Company Count.csv', parse_dates=['Month'])
data['Month'] = pd.to_datetime(data['Month'])
data['EOMONTH'] = pd.to_datetime(data['EOMONTH'])
data.head()

Unnamed: 0,Month,EOMONTH,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume
0,2017-11-01,2017-11-30,37,5097000,133000,0,0,5230000
1,2019-06-01,2019-06-30,105,50452000,3614000,16691000,326000,71083000
2,2020-01-01,2020-01-31,137,112661000,7604000,54283000,1540000,176088000
3,2018-11-01,2018-11-30,70,32562000,6604000,791000,39000,39996000
4,2021-05-01,2021-05-31,168,68404000,1963000,371465000,2362000,444194000


In [203]:
# Load or generate your time series data
data1 = pd.read_csv('Clean Customer Data.csv', parse_dates=['Month'])
data1['Month'] = pd.to_datetime(data1['Month'])
data1['EOMONTH'] = pd.to_datetime(data1['EOMONTH'])
data1.head()

Unnamed: 0,Month,EOMONTH,CUSTOMER_ID-1,CUSTOMER_NAME,Customer Cohort Month,Small Business Lending,Other,Consumer,Mortgage,Grand Total,Total In-Month Revenue,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate
0,2021-08-01,2021-08-31,A00001405,SMG Automotive Holdings LLC,2021-08-01,0,0,2000,0,2000,6.0,0.0025,0.001196,0.0007,0.0077,0.013,0.0192,0.0325
1,2020-05-01,2020-05-31,A00000263,Neat Capital Inc,2019-07-01,0,0,0,173000,173000,500.2,0.0025,0.00344,0.0017,0.003,0.0065,0.0141,0.0325
2,2020-01-01,2020-01-31,A00000263,Neat Capital Inc,2019-07-01,0,0,0,40000,40000,156.96,0.0175,0.017511,0.0145,0.0132,0.0151,0.0199,0.0475
3,2020-06-01,2020-06-30,A00000263,Neat Capital Inc,2019-07-01,0,0,0,316000,316000,899.28,0.0025,0.00302,0.0016,0.0029,0.0066,0.0141,0.0325
4,2020-12-01,2020-12-31,A00000263,Neat Capital Inc,2019-07-01,0,0,0,190000,190000,1029.2,0.0025,0.002384,0.001,0.0036,0.0093,0.0165,0.0325


In [204]:
# Load or generate your time series data
data2 = pd.read_csv('Monthly Correlation Metrics.csv', parse_dates=['Month'])
data2['Month'] = pd.to_datetime(data2['Month'])
data2['EOMONTH'] = pd.to_datetime(data2['EOMONTH'])
data2 = data2.sort_values(by=['Month'])
data2['Month'] = data2['Month'].shift(-1)
data2

Unnamed: 0,Month,EOMONTH,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate
18,2016-07-01,2016-06-30,0.0050,0.006541,0.0045,0.0101,0.0149,0.0230,0.0350
19,2016-08-01,2016-07-31,0.0050,0.007591,0.0050,0.0103,0.0146,0.0218,0.0350
64,2016-09-01,2016-08-31,0.0050,0.008393,0.0061,0.0119,0.0158,0.0223,0.0350
22,2016-10-01,2016-09-30,0.0050,0.008537,0.0059,0.0114,0.0160,0.0232,0.0350
21,2016-11-01,2016-10-31,0.0050,0.008843,0.0066,0.0131,0.0184,0.0258,0.0350
...,...,...,...,...,...,...,...,...,...
23,2023-01-01,2022-12-31,0.0450,0.047673,0.0473,0.0399,0.0388,0.0397,0.0750
1,2023-02-01,2023-01-31,0.0450,0.048136,0.0468,0.0363,0.0352,0.0365,0.0750
73,2023-03-01,2023-02-28,0.0475,0.049710,0.0502,0.0418,0.0392,0.0393,0.0775
28,2023-04-01,2023-03-31,0.0500,0.051927,0.0464,0.0360,0.0348,0.0367,0.0800


In [205]:
df = pd.merge(data, data2, left_on='Month', right_on="Month")
df['Month'] = pd.to_datetime(df['Month'])
df['EOMONTH_x'] = pd.to_datetime(df['EOMONTH_x'])
df['EOMONTH_y'] = pd.to_datetime(df['EOMONTH_y'])
df.set_index('Month', inplace=True)
df.dropna(inplace=True)
df = df.sort_index()
df

Unnamed: 0_level_0,EOMONTH_x,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume,EOMONTH_y,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016-07-01,2016-07-31,20,76000,59000,0,0,135000,2016-06-30,0.0050,0.006541,0.0045,0.0101,0.0149,0.0230,0.0350
2016-08-01,2016-08-31,25,23000,94000,0,0,117000,2016-07-31,0.0050,0.007591,0.0050,0.0103,0.0146,0.0218,0.0350
2016-09-01,2016-09-30,29,16000,43000,0,0,59000,2016-08-31,0.0050,0.008393,0.0061,0.0119,0.0158,0.0223,0.0350
2016-10-01,2016-10-31,23,41000,47000,0,0,88000,2016-09-30,0.0050,0.008537,0.0059,0.0114,0.0160,0.0232,0.0350
2016-11-01,2016-11-30,18,48000,37000,0,0,85000,2016-10-31,0.0050,0.008843,0.0066,0.0131,0.0184,0.0258,0.0350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-01,2022-12-31,235,251391000,1108000,157878000,11650000,422027000,2022-11-30,0.0400,0.047786,0.0474,0.0382,0.0368,0.0380,0.0700
2023-01-01,2023-01-31,230,302672000,1143000,178163000,16156000,498134000,2022-12-31,0.0450,0.047673,0.0473,0.0399,0.0388,0.0397,0.0750
2023-02-01,2023-02-28,240,198101000,1574000,146815000,17633000,364123000,2023-01-31,0.0450,0.048136,0.0468,0.0363,0.0352,0.0365,0.0750
2023-03-01,2023-03-31,247,169968000,1969000,178921000,22469000,373327000,2023-02-28,0.0475,0.049710,0.0502,0.0418,0.0392,0.0393,0.0775


In [206]:
df['Avg Vol Per Customer'] = df['Volume']/df['Active Customer Count']

In [207]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 82 entries, 2016-07-01 to 2023-04-01
Data columns (total 16 columns):
 #   Column                                              Non-Null Count  Dtype         
---  ------                                              --------------  -----         
 0   EOMONTH_x                                           82 non-null     datetime64[ns]
 1   Active Customer Count                               82 non-null     int64         
 2   Small Business Lending                              82 non-null     int64         
 3   Other                                               82 non-null     int64         
 4   Consumer                                            82 non-null     int64         
 5   Mortgage                                            82 non-null     int64         
 6   Volume                                              82 non-null     int64         
 7   EOMONTH_y                                           82 non-null     datetime64[n

In [208]:
dfg = df
dfg['Volume_Change'] = dfg['Volume'].pct_change()*100
dfg.dropna(inplace=True)
dfg

Unnamed: 0_level_0,EOMONTH_x,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume,EOMONTH_y,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate,Avg Vol Per Customer,Volume_Change
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2016-08-01,2016-08-31,25,23000,94000,0,0,117000,2016-07-31,0.0050,0.007591,0.0050,0.0103,0.0146,0.0218,0.0350,4.680000e+03,-13.333333
2016-09-01,2016-09-30,29,16000,43000,0,0,59000,2016-08-31,0.0050,0.008393,0.0061,0.0119,0.0158,0.0223,0.0350,2.034483e+03,-49.572650
2016-10-01,2016-10-31,23,41000,47000,0,0,88000,2016-09-30,0.0050,0.008537,0.0059,0.0114,0.0160,0.0232,0.0350,3.826087e+03,49.152542
2016-11-01,2016-11-30,18,48000,37000,0,0,85000,2016-10-31,0.0050,0.008843,0.0066,0.0131,0.0184,0.0258,0.0350,4.722222e+03,-3.409091
2016-12-01,2016-12-31,20,53000,60000,0,0,113000,2016-11-30,0.0050,0.009342,0.0080,0.0183,0.0237,0.0302,0.0350,5.650000e+03,32.941176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-01,2022-12-31,235,251391000,1108000,157878000,11650000,422027000,2022-11-30,0.0400,0.047786,0.0474,0.0382,0.0368,0.0380,0.0700,1.795860e+06,-8.380081
2023-01-01,2023-01-31,230,302672000,1143000,178163000,16156000,498134000,2022-12-31,0.0450,0.047673,0.0473,0.0399,0.0388,0.0397,0.0750,2.165800e+06,18.033680
2023-02-01,2023-02-28,240,198101000,1574000,146815000,17633000,364123000,2023-01-31,0.0450,0.048136,0.0468,0.0363,0.0352,0.0365,0.0750,1.517179e+06,-26.902601
2023-03-01,2023-03-31,247,169968000,1969000,178921000,22469000,373327000,2023-02-28,0.0475,0.049710,0.0502,0.0418,0.0392,0.0393,0.0775,1.511445e+06,2.527717


In [209]:

X = pd.to_numeric(dfg.index).values.reshape(-1,1)
y = dfg['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()

In [210]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                      -0.009
Model:                            OLS   Adj. R-squared:                 -0.009
Method:                 Least Squares   F-statistic:                       nan
Date:                Tue, 16 May 2023   Prob (F-statistic):                nan
Time:                        15:46:27   Log-Likelihood:                -433.57
No. Observations:                  81   AIC:                             869.1
Df Residuals:                      80   BIC:                             871.5
Df Model:                           0                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       6.307e-36    2.3e-36      2.745      0.0

In [211]:
# Perform Holt-Winters decomposition
result = seasonal_decompose(df['Volume'], model='multiplicative', extrapolate_trend='freq')

In [212]:
# Extract trend, seasonal, and residual components
trend = result.trend
seasonal = result.seasonal
residual = result.resid

In [213]:
residual.dropna()

Month
2016-08-01   -0.028207
2016-09-01   -0.016547
2016-10-01   -0.026544
2016-11-01   -0.035488
2016-12-01   -0.070203
                ...   
2022-12-01    0.931571
2023-01-01    0.945729
2023-02-01    0.695546
2023-03-01    0.677236
2023-04-01    0.261510
Name: resid, Length: 81, dtype: float64

In [214]:
corrdf = pd.merge(residual.dropna(), df, left_index=True, right_index=True, how='inner')
corrdf['Fed_Funds_Target_Rate'] = corrdf['Fed_Funds_Target_Rate']*100
corrdf['LIBOR___3_Month'] = corrdf['LIBOR___3_Month']*100
corrdf['United_States_Treasury_Constant_Maturity___1_Year'] = corrdf['United_States_Treasury_Constant_Maturity___1_Year']*100
corrdf['United_States_Treasury_Constant_Maturity___5_Year'] = corrdf['United_States_Treasury_Constant_Maturity___5_Year']*100
corrdf['United_States_Treasury_Constant_Maturity___10_Year'] = corrdf['United_States_Treasury_Constant_Maturity___10_Year']*100
corrdf['United_States_Treasury_Constant_Maturity___30_Year'] = corrdf['United_States_Treasury_Constant_Maturity___30_Year']*100
corrdf['US_Prime_Rate'] = corrdf['US_Prime_Rate']*100
corrdf.head()

Unnamed: 0_level_0,resid,EOMONTH_x,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume,EOMONTH_y,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate,Avg Vol Per Customer,Volume_Change
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2016-08-01,-0.028207,2016-08-31,25,23000,94000,0,0,117000,2016-07-31,0.5,0.7591,0.5,1.03,1.46,2.18,3.5,4680.0,-13.333333
2016-09-01,-0.016547,2016-09-30,29,16000,43000,0,0,59000,2016-08-31,0.5,0.83933,0.61,1.19,1.58,2.23,3.5,2034.482759,-49.57265
2016-10-01,-0.026544,2016-10-31,23,41000,47000,0,0,88000,2016-09-30,0.5,0.85367,0.59,1.14,1.6,2.32,3.5,3826.086957,49.152542
2016-11-01,-0.035488,2016-11-30,18,48000,37000,0,0,85000,2016-10-31,0.5,0.88428,0.66,1.31,1.84,2.58,3.5,4722.222222,-3.409091
2016-12-01,-0.070203,2016-12-31,20,53000,60000,0,0,113000,2016-11-30,0.5,0.93417,0.8,1.83,2.37,3.02,3.5,5650.0,32.941176


In [215]:
# Calculate correlation between residual component and Fed Funds Target Rate
correlation, p_value = pearsonr(residual.dropna(), corrdf['Fed_Funds_Target_Rate'])
print(f"Correlation between residual component and the fed funds target rate: {correlation:.3f}")
print(f"P-value: {p_value:.3f}")

Correlation between residual component and the fed funds target rate: 0.016
P-value: 0.888


In [216]:
# Calculate correlation between residual component and LIBOR 3M Rate
correlation, p_value = pearsonr(residual.dropna(), corrdf['LIBOR___3_Month'])
print(f"Correlation between residual component and the LIBOR 3 Month Rate: {correlation:.3f}")
print(f"P-value: {p_value:.3f}")

Correlation between residual component and the LIBOR 3 Month Rate: -0.016
P-value: 0.886


In [217]:
# Calculate correlation between residual component and US Prime Rate
correlation, p_value = pearsonr(residual.dropna(), corrdf['US_Prime_Rate'])
print(f"Correlation between residual component and the US prime rate: {correlation:.3f}")
print(f"P-value: {p_value:.3f}")

Correlation between residual component and the US prime rate: 0.016
P-value: 0.888


In [218]:
# Calculate correlation between residual component and US Treasury Constant Maturity 1 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___1_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 1 Year: {correlation:.3f}")
print(f"P-value: {p_value:.3f}")

Correlation between residual component and the US Treasury Constant Maturity 1 Year: 0.027
P-value: 0.814


In [219]:
# Calculate correlation between residual component and US Treasury Constant Maturity 5 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___5_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 5 Year: {correlation:.3f}")
print(f"P-value: {p_value:.3f}")

Correlation between residual component and the US Treasury Constant Maturity 5 Year: -0.025
P-value: 0.824


In [220]:
# Calculate correlation between residual component and US Treasury Constant Maturity 10 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___10_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 10 Year: {correlation:.3f}")
print(f"P-value: {p_value:.3f}")

Correlation between residual component and the US Treasury Constant Maturity 10 Year: -0.069
P-value: 0.543


In [221]:
# Calculate correlation between residual component and US Treasury Constant Maturity 30 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___30_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 30 Year: {correlation:.3f}")
print(f"P-value: {p_value:.3f}")

Correlation between residual component and the US Treasury Constant Maturity 30 Year: -0.145
P-value: 0.197


In [226]:
X = corrdf['United_States_Treasury_Constant_Maturity___30_Year']
y = residual.dropna()
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                  resid   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.009
Method:                 Least Squares   F-statistic:                     1.690
Date:                Tue, 16 May 2023   Prob (F-statistic):              0.197
Time:                        15:48:41   Log-Likelihood:                -23.338
No. Observations:                  81   AIC:                             50.68
Df Residuals:                      79   BIC:                             55.47
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                                                         coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------

In [222]:
# Load or generate your time series data
data3 = pd.read_csv('P.Oakley Sales and Marketing Spend.csv', parse_dates=['Month'])
data3['Total S_M Spend'] = data3['Total S_M Spend'].apply(lambda x: float(x)*1000)
data3['Month'] = pd.to_datetime(data3['Month'])
data3 = data3.sort_values(by=['Month'])
data3['Month'] = data3['Month'].shift(-1)
data3.set_index('Month', inplace=True)
d3 = data3[['Total S_M Spend']]
d3.head()

Unnamed: 0_level_0,Total S_M Spend
Month,Unnamed: 1_level_1
2019-02-01,101900.0
2019-03-01,95500.0
2019-04-01,111200.0
2019-05-01,146500.0
2019-06-01,136000.0


In [223]:
ust = pd.merge(d3,df, left_index=True, right_index=True, how='inner')
ust['Volume_Change'] = ust['Volume'].pct_change()*100
ust.dropna(inplace=True)
ust = ust[['Volume_Change', 'Volume','Total S_M Spend', 'United_States_Treasury_Constant_Maturity___30_Year']]
ust['United_States_Treasury_Constant_Maturity___30_Year'] = ust['United_States_Treasury_Constant_Maturity___30_Year']*100
ust

Unnamed: 0_level_0,Volume_Change,Volume,Total S_M Spend,United_States_Treasury_Constant_Maturity___30_Year
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-01,9.12457,54523000,95500.0,3.09
2019-04-01,14.863452,62627000,111200.0,2.81
2019-05-01,18.198221,74024000,146500.0,2.93
2019-06-01,-3.973036,71083000,136000.0,2.58
2019-07-01,20.388841,85576000,170600.0,2.52
2019-08-01,12.083996,95917000,137200.0,2.53
2019-09-01,-8.855573,87423000,195900.0,1.96
2019-10-01,30.718461,114278000,210900.0,2.12
2019-11-01,3.254345,117997000,259100.0,2.17
2019-12-01,20.933583,142698000,285900.0,2.21


In [224]:
X = ust[['Total S_M Spend', 'United_States_Treasury_Constant_Maturity___30_Year']]
y = ust['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.773
Model:                            OLS   Adj. R-squared:                  0.763
Method:                 Least Squares   F-statistic:                     78.36
Date:                Tue, 16 May 2023   Prob (F-statistic):           1.53e-15
Time:                        15:46:28   Log-Likelihood:                -957.11
No. Observations:                  49   AIC:                             1920.
Df Residuals:                      46   BIC:                             1926.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                                         coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------