# Imports

In [1]:
# import necessary python libraries and modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm
from scipy.stats import pearsonr
import datetime as dt
import seaborn as sns

# Load Data

##### Monthly Ocrolus Volume and Active Company Counts

In [2]:
# Load data
data = pd.read_csv('Monthly Volume and Company Count.csv', parse_dates=['Month'])
data['Month'] = pd.to_datetime(data['Month'])
data['EOMONTH'] = pd.to_datetime(data['EOMONTH'])
data.head()

Unnamed: 0,Month,EOMONTH,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume
0,2017-11-01,2017-11-30,37,5097000,133000,0,0,5230000
1,2019-06-01,2019-06-30,105,50452000,3614000,16691000,326000,71083000
2,2020-01-01,2020-01-31,137,112661000,7604000,54283000,1540000,176088000
3,2018-11-01,2018-11-30,70,32562000,6604000,791000,39000,39996000
4,2021-05-01,2021-05-31,168,68404000,1963000,371465000,2362000,444194000


##### Customer Level Ocrolus Data

In [3]:
# Load data
data1 = pd.read_csv('Clean Customer Data.csv', parse_dates=['Month'])
data1['Month'] = pd.to_datetime(data1['Month'])
data1['EOMONTH'] = pd.to_datetime(data1['EOMONTH'])
data1.head()

Unnamed: 0,Month,EOMONTH,CUSTOMER_ID-1,CUSTOMER_NAME,Customer Cohort Month,Small Business Lending,Other,Consumer,Mortgage,Grand Total,Total In-Month Revenue,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate
0,2021-08-01,2021-08-31,A00001405,SMG Automotive Holdings LLC,2021-08-01,0,0,2000,0,2000,6.0,0.0025,0.001196,0.0007,0.0077,0.013,0.0192,0.0325
1,2020-05-01,2020-05-31,A00000263,Neat Capital Inc,2019-07-01,0,0,0,173000,173000,500.2,0.0025,0.00344,0.0017,0.003,0.0065,0.0141,0.0325
2,2020-01-01,2020-01-31,A00000263,Neat Capital Inc,2019-07-01,0,0,0,40000,40000,156.96,0.0175,0.017511,0.0145,0.0132,0.0151,0.0199,0.0475
3,2020-06-01,2020-06-30,A00000263,Neat Capital Inc,2019-07-01,0,0,0,316000,316000,899.28,0.0025,0.00302,0.0016,0.0029,0.0066,0.0141,0.0325
4,2020-12-01,2020-12-31,A00000263,Neat Capital Inc,2019-07-01,0,0,0,190000,190000,1029.2,0.0025,0.002384,0.001,0.0036,0.0093,0.0165,0.0325


##### Monthly Correlation Metrics

In [4]:
# Load data
data2 = pd.read_csv('Monthly Correlation Metrics.csv', parse_dates=['Month'])
data2['Month'] = pd.to_datetime(data2['Month'])
data2['EOMONTH'] = pd.to_datetime(data2['EOMONTH'])
data2 = data2.sort_values(by=['Month'])
data2['Month'] = data2['Month'].shift(-1)
data2

Unnamed: 0,Month,EOMONTH,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate
18,2016-07-01,2016-06-30,0.0050,0.006541,0.0045,0.0101,0.0149,0.0230,0.0350
19,2016-08-01,2016-07-31,0.0050,0.007591,0.0050,0.0103,0.0146,0.0218,0.0350
64,2016-09-01,2016-08-31,0.0050,0.008393,0.0061,0.0119,0.0158,0.0223,0.0350
22,2016-10-01,2016-09-30,0.0050,0.008537,0.0059,0.0114,0.0160,0.0232,0.0350
21,2016-11-01,2016-10-31,0.0050,0.008843,0.0066,0.0131,0.0184,0.0258,0.0350
...,...,...,...,...,...,...,...,...,...
23,2023-01-01,2022-12-31,0.0450,0.047673,0.0473,0.0399,0.0388,0.0397,0.0750
1,2023-02-01,2023-01-31,0.0450,0.048136,0.0468,0.0363,0.0352,0.0365,0.0750
73,2023-03-01,2023-02-28,0.0475,0.049710,0.0502,0.0418,0.0392,0.0393,0.0775
28,2023-04-01,2023-03-31,0.0500,0.051927,0.0464,0.0360,0.0348,0.0367,0.0800


##### Monthly Ocrolus S&M Spend

In [5]:
# Load data
data3 = pd.read_csv('P.Oakley Sales and Marketing Spend.csv', parse_dates=['Month'])
data3['Total S_M Spend'] = data3['Total S_M Spend'].apply(lambda x: float(x)*1000)
data3['Month'] = pd.to_datetime(data3['Month'])
data3 = data3.sort_values(by=['Month'])
data3['Month'] = data3['Month'].shift(-1)
data3.set_index('Month', inplace=True)
d3 = data3[['Total S_M Spend']]
d3.head()

Unnamed: 0_level_0,Total S_M Spend
Month,Unnamed: 1_level_1
2019-02-01,101900.0
2019-03-01,95500.0
2019-04-01,111200.0
2019-05-01,146500.0
2019-06-01,136000.0


##### Existing Account Monthly Ocrolus S&M Spend

In [6]:
# Load data
data4 = pd.read_csv('Existing S&M Spend.csv', parse_dates=['Date'])
data4['Sales Team Farmer Expense'] = data4['Sales Team Farmer Expense'].apply(lambda x: float(x)*1000)
data4['Account Management'] = data4['Account Management'].apply(lambda x: float(x)*1000)
data4['Date'] = pd.to_datetime(data4['Date'])
data4 = data4.sort_values(by=['Date'])
data4['Date'] = data4['Date'].shift(-1)
data4.set_index('Date', inplace=True)
data4.drop(columns=['Unnamed: 3'], inplace=True)
data4.head()

Unnamed: 0_level_0,Sales Team Farmer Expense,Account Management
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-02-01,36260.0,28330.0
2021-03-01,40650.0,28330.0
2021-04-01,73410.0,28330.0
2021-05-01,23810.0,28710.0
2021-06-01,15740.0,36250.0


##### Combined Dataset: Monthly Ocrolus Volume and Active Company Counts + Monthly Correlation Metrics (1M Lag)

In [7]:
# Join dataframes on month, drop null values (1st row due to 1m lag on correlation metrics), and create average volume per customer variable
df = pd.merge(data, data2, left_on='Month', right_on="Month")
df['Month'] = pd.to_datetime(df['Month'])
df['EOMONTH_x'] = pd.to_datetime(df['EOMONTH_x'])
df['EOMONTH_y'] = pd.to_datetime(df['EOMONTH_y'])
df.set_index('Month', inplace=True)
df.dropna(inplace=True)
df = df.sort_index()
df['Avg Vol Per Customer'] = df['Volume']/df['Active Customer Count']
df

Unnamed: 0_level_0,EOMONTH_x,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume,EOMONTH_y,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate,Avg Vol Per Customer
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2016-07-01,2016-07-31,20,76000,59000,0,0,135000,2016-06-30,0.0050,0.006541,0.0045,0.0101,0.0149,0.0230,0.0350,6.750000e+03
2016-08-01,2016-08-31,25,23000,94000,0,0,117000,2016-07-31,0.0050,0.007591,0.0050,0.0103,0.0146,0.0218,0.0350,4.680000e+03
2016-09-01,2016-09-30,29,16000,43000,0,0,59000,2016-08-31,0.0050,0.008393,0.0061,0.0119,0.0158,0.0223,0.0350,2.034483e+03
2016-10-01,2016-10-31,23,41000,47000,0,0,88000,2016-09-30,0.0050,0.008537,0.0059,0.0114,0.0160,0.0232,0.0350,3.826087e+03
2016-11-01,2016-11-30,18,48000,37000,0,0,85000,2016-10-31,0.0050,0.008843,0.0066,0.0131,0.0184,0.0258,0.0350,4.722222e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-01,2022-12-31,235,251391000,1108000,157878000,11650000,422027000,2022-11-30,0.0400,0.047786,0.0474,0.0382,0.0368,0.0380,0.0700,1.795860e+06
2023-01-01,2023-01-31,230,302672000,1143000,178163000,16156000,498134000,2022-12-31,0.0450,0.047673,0.0473,0.0399,0.0388,0.0397,0.0750,2.165800e+06
2023-02-01,2023-02-28,240,198101000,1574000,146815000,17633000,364123000,2023-01-31,0.0450,0.048136,0.0468,0.0363,0.0352,0.0365,0.0750,1.517179e+06
2023-03-01,2023-03-31,247,169968000,1969000,178921000,22469000,373327000,2023-02-28,0.0475,0.049710,0.0502,0.0418,0.0392,0.0393,0.0775,1.511445e+06


In [8]:
# Basic info on dataframe structure
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 82 entries, 2016-07-01 to 2023-04-01
Data columns (total 16 columns):
 #   Column                                              Non-Null Count  Dtype         
---  ------                                              --------------  -----         
 0   EOMONTH_x                                           82 non-null     datetime64[ns]
 1   Active Customer Count                               82 non-null     int64         
 2   Small Business Lending                              82 non-null     int64         
 3   Other                                               82 non-null     int64         
 4   Consumer                                            82 non-null     int64         
 5   Mortgage                                            82 non-null     int64         
 6   Volume                                              82 non-null     int64         
 7   EOMONTH_y                                           82 non-null     datetime64[n

##### Combined Dataset With Volume % Change Variable 

In [9]:
# Create new dataframe with volume percent change variable
dfg = df
dfg['Volume_Change'] = dfg['Volume'].pct_change()*100
dfg.dropna(inplace=True)
dfg

Unnamed: 0_level_0,EOMONTH_x,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume,EOMONTH_y,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate,Avg Vol Per Customer,Volume_Change
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2016-08-01,2016-08-31,25,23000,94000,0,0,117000,2016-07-31,0.0050,0.007591,0.0050,0.0103,0.0146,0.0218,0.0350,4.680000e+03,-13.333333
2016-09-01,2016-09-30,29,16000,43000,0,0,59000,2016-08-31,0.0050,0.008393,0.0061,0.0119,0.0158,0.0223,0.0350,2.034483e+03,-49.572650
2016-10-01,2016-10-31,23,41000,47000,0,0,88000,2016-09-30,0.0050,0.008537,0.0059,0.0114,0.0160,0.0232,0.0350,3.826087e+03,49.152542
2016-11-01,2016-11-30,18,48000,37000,0,0,85000,2016-10-31,0.0050,0.008843,0.0066,0.0131,0.0184,0.0258,0.0350,4.722222e+03,-3.409091
2016-12-01,2016-12-31,20,53000,60000,0,0,113000,2016-11-30,0.0050,0.009342,0.0080,0.0183,0.0237,0.0302,0.0350,5.650000e+03,32.941176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-01,2022-12-31,235,251391000,1108000,157878000,11650000,422027000,2022-11-30,0.0400,0.047786,0.0474,0.0382,0.0368,0.0380,0.0700,1.795860e+06,-8.380081
2023-01-01,2023-01-31,230,302672000,1143000,178163000,16156000,498134000,2022-12-31,0.0450,0.047673,0.0473,0.0399,0.0388,0.0397,0.0750,2.165800e+06,18.033680
2023-02-01,2023-02-28,240,198101000,1574000,146815000,17633000,364123000,2023-01-31,0.0450,0.048136,0.0468,0.0363,0.0352,0.0365,0.0750,1.517179e+06,-26.902601
2023-03-01,2023-03-31,247,169968000,1969000,178921000,22469000,373327000,2023-02-28,0.0475,0.049710,0.0502,0.0418,0.0392,0.0393,0.0775,1.511445e+06,2.527717


##### Combined Dataset With S&M Spend and US Prime Rate

In [10]:
# Create dataframe with Total S&M Spend, US Prime Rate, Volume, and Volume % Change
ust = pd.merge(d3,df, left_index=True, right_index=True, how='inner')
ust['Volume_Change'] = ust['Volume'].pct_change()*100
ust.dropna(inplace=True)
ust = ust[['Volume_Change', 'Volume','Total S_M Spend', 'US_Prime_Rate']]
ust['US_Prime_Rate'] = ust['US_Prime_Rate']*100
ust

Unnamed: 0_level_0,Volume_Change,Volume,Total S_M Spend,US_Prime_Rate
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-01,9.12457,54523000,95500.0,5.5
2019-04-01,14.863452,62627000,111200.0,5.5
2019-05-01,18.198221,74024000,146500.0,5.5
2019-06-01,-3.973036,71083000,136000.0,5.5
2019-07-01,20.388841,85576000,170600.0,5.5
2019-08-01,12.083996,95917000,137200.0,5.5
2019-09-01,-8.855573,87423000,195900.0,5.25
2019-10-01,30.718461,114278000,210900.0,5.0
2019-11-01,3.254345,117997000,259100.0,4.75
2019-12-01,20.933583,142698000,285900.0,4.75


##### Combined Dataset with Existing Account Related S&M and US Prime Rate

In [11]:
# Create dataframe with Existing Account Related S&M Spend, US Prime Rate, Volume, and Volume % Change
esm = pd.merge(data4,df, left_index=True, right_index=True, how='inner')
esm['Volume_Change'] = esm['Volume'].pct_change()*100
esm.dropna(inplace=True)
esm['Total Existing Account S&M'] = esm['Sales Team Farmer Expense'] + esm['Account Management']
esm = esm[['Volume_Change', 'Volume','Sales Team Farmer Expense', 'Account Management', 'Total Existing Account S&M', 'US_Prime_Rate']]
esm['US_Prime_Rate'] = esm['US_Prime_Rate']*100
esm

Unnamed: 0,Volume_Change,Volume,Sales Team Farmer Expense,Account Management,Total Existing Account S&M,US_Prime_Rate
2021-03-01,-9.346544,394435000,40650.0,28330.0,68980.0,3.25
2021-04-01,11.020574,437904000,73410.0,28330.0,101740.0,3.25
2021-05-01,1.436388,444194000,23810.0,28710.0,52520.0,3.25
2021-06-01,-30.455837,308911000,15740.0,36250.0,51990.0,3.25
2021-07-01,1.480685,313485000,21220.0,28330.0,49550.0,3.25
2021-08-01,16.955197,366637000,15780.0,28330.0,44110.0,3.25
2021-09-01,4.473635,383039000,16550.0,40000.0,56550.0,3.25
2021-10-01,24.554419,477092000,16110.0,41600.0,57710.0,3.25
2021-11-01,10.756206,528409000,27820.0,68900.0,96720.0,3.25
2021-12-01,-10.930359,470652000,26470.0,58630.0,85100.0,3.25


# Initial Linear Regressions

##### Simple Least Squares Regression of Volume % Change to Inspect General Growth Trend

In [12]:
# Linear regression: Dependent variable = Ocrolus Volume Percent Change; Independent variables = Time (in months) - to inspect general growth trend
X = pd.to_numeric(dfg.index).values.reshape(-1,1)
y = dfg['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                      -0.009
Model:                            OLS   Adj. R-squared:                 -0.009
Method:                 Least Squares   F-statistic:                       nan
Date:                Tue, 23 May 2023   Prob (F-statistic):                nan
Time:                        13:37:35   Log-Likelihood:                -433.57
No. Observations:                  81   AIC:                             869.1
Df Residuals:                      80   BIC:                             871.5
Df Model:                           0                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       6.307e-36    2.3e-36      2.745      0.0

##### Simple Least Squares Regression of Volume to Inspect General Growth Trend

In [13]:
# Linear regression: Dependent variable = Ocrolus Volume; Independent variables = Time (in months) - to inspect general growth trend
X = pd.to_numeric(df.index).values.reshape(-1,1)
y = df['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.073
Model:                            OLS   Adj. R-squared:                  0.073
Method:                 Least Squares   F-statistic:                       nan
Date:                Tue, 23 May 2023   Prob (F-statistic):                nan
Time:                        13:37:35   Log-Likelihood:                -1656.3
No. Observations:                  81   AIC:                             3315.
Df Residuals:                      80   BIC:                             3317.
Df Model:                           0                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       8.275e-29   8.27e-30     10.012      0.0

# Signal Decomposition Analysis

In [14]:
# Perform Holt-Winters decomposition
result = seasonal_decompose(df['Volume'], model='additive', extrapolate_trend='freq')

In [15]:
# Extract trend, seasonal, and residual components
trend = result.trend
seasonal = result.seasonal
residual = result.resid

In [16]:
trend

Month
2016-08-01   -4.552367e+06
2016-09-01   -3.850070e+06
2016-10-01   -3.147773e+06
2016-11-01   -2.445475e+06
2016-12-01   -1.743178e+06
                  ...     
2022-12-01    4.906172e+08
2023-01-01    4.946970e+08
2023-02-01    4.987769e+08
2023-03-01    5.028567e+08
2023-04-01    5.069366e+08
Name: trend, Length: 81, dtype: float64

In [17]:
seasonal

Month
2016-08-01    6.318086e+06
2016-09-01    4.414234e+06
2016-10-01    1.536610e+07
2016-11-01    1.388677e+07
2016-12-01    2.003244e+06
                  ...     
2022-12-01    2.003244e+06
2023-01-01    3.876408e+07
2023-02-01   -6.845914e+06
2023-03-01   -1.837486e+07
2023-04-01   -5.603429e+07
Name: seasonal, Length: 81, dtype: float64

In [18]:
residual

Month
2016-08-01   -1.648718e+06
2016-09-01   -5.051637e+05
2016-10-01   -1.213033e+07
2016-11-01   -1.135629e+07
2016-12-01   -1.470662e+05
                  ...     
2022-12-01   -7.059345e+07
2023-01-01   -3.532712e+07
2023-02-01   -1.278080e+08
2023-03-01   -1.111549e+08
2023-04-01   -3.352613e+08
Name: resid, Length: 81, dtype: float64

In [19]:
# Create dataframe with residual portion of volume data and monthly correlation metrics (multiplied by 100 for rate %s)
corrdf = pd.merge(residual.dropna(), df, left_index=True, right_index=True, how='inner')
corrdf['Fed_Funds_Target_Rate'] = corrdf['Fed_Funds_Target_Rate']*100
corrdf['LIBOR___3_Month'] = corrdf['LIBOR___3_Month']*100
corrdf['United_States_Treasury_Constant_Maturity___1_Year'] = corrdf['United_States_Treasury_Constant_Maturity___1_Year']*100
corrdf['United_States_Treasury_Constant_Maturity___5_Year'] = corrdf['United_States_Treasury_Constant_Maturity___5_Year']*100
corrdf['United_States_Treasury_Constant_Maturity___10_Year'] = corrdf['United_States_Treasury_Constant_Maturity___10_Year']*100
corrdf['United_States_Treasury_Constant_Maturity___30_Year'] = corrdf['United_States_Treasury_Constant_Maturity___30_Year']*100
corrdf['US_Prime_Rate'] = corrdf['US_Prime_Rate']*100
corrdf.head()

Unnamed: 0_level_0,resid,EOMONTH_x,Active Customer Count,Small Business Lending,Other,Consumer,Mortgage,Volume,EOMONTH_y,Fed_Funds_Target_Rate,LIBOR___3_Month,United_States_Treasury_Constant_Maturity___1_Year,United_States_Treasury_Constant_Maturity___5_Year,United_States_Treasury_Constant_Maturity___10_Year,United_States_Treasury_Constant_Maturity___30_Year,US_Prime_Rate,Avg Vol Per Customer,Volume_Change
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2016-08-01,-1648718.0,2016-08-31,25,23000,94000,0,0,117000,2016-07-31,0.5,0.7591,0.5,1.03,1.46,2.18,3.5,4680.0,-13.333333
2016-09-01,-505163.7,2016-09-30,29,16000,43000,0,0,59000,2016-08-31,0.5,0.83933,0.61,1.19,1.58,2.23,3.5,2034.482759,-49.57265
2016-10-01,-12130330.0,2016-10-31,23,41000,47000,0,0,88000,2016-09-30,0.5,0.85367,0.59,1.14,1.6,2.32,3.5,3826.086957,49.152542
2016-11-01,-11356290.0,2016-11-30,18,48000,37000,0,0,85000,2016-10-31,0.5,0.88428,0.66,1.31,1.84,2.58,3.5,4722.222222,-3.409091
2016-12-01,-147066.2,2016-12-31,20,53000,60000,0,0,113000,2016-11-30,0.5,0.93417,0.8,1.83,2.37,3.02,3.5,5650.0,32.941176


In [42]:
# Create dataframe with trend portion of volume data and monthly Existing Account S&M expenses
trd = pd.merge(trend.dropna(), esm, left_index=True, right_index=True, how='inner')
trd

Unnamed: 0,trend,Volume_Change,Volume,Sales Team Farmer Expense,Account Management,Total Existing Account S&M,US_Prime_Rate
2021-03-01,368492300.0,-9.346544,394435000,40650.0,28330.0,68980.0,3.25
2021-04-01,381963100.0,11.020574,437904000,73410.0,28330.0,101740.0,3.25
2021-05-01,400083700.0,1.436388,444194000,23810.0,28710.0,52520.0,3.25
2021-06-01,414757000.0,-30.455837,308911000,15740.0,36250.0,51990.0,3.25
2021-07-01,421181500.0,1.480685,313485000,21220.0,28330.0,49550.0,3.25
2021-08-01,422161400.0,16.955197,366637000,15780.0,28330.0,44110.0,3.25
2021-09-01,424606700.0,4.473635,383039000,16550.0,40000.0,56550.0,3.25
2021-10-01,425686400.0,24.554419,477092000,16110.0,41600.0,57710.0,3.25
2021-11-01,423997500.0,10.756206,528409000,27820.0,68900.0,96720.0,3.25
2021-12-01,431018300.0,-10.930359,470652000,26470.0,58630.0,85100.0,3.25


In [20]:
# Calculate correlation between residual component and Fed Funds Target Rate
correlation, p_value = pearsonr(residual.dropna(), corrdf['Fed_Funds_Target_Rate'])
print(f"Correlation between residual component and the fed funds target rate: {correlation:.6f}")
print(f"P-value: {p_value:.6f}")

Correlation between residual component and the fed funds target rate: -0.418956
P-value: 0.000099


In [21]:
# Calculate correlation between residual component and US Prime Rate
correlation, p_value = pearsonr(residual.dropna(), corrdf['US_Prime_Rate'])
print(f"Correlation between residual component and the US prime rate: {correlation:.6f}")
print(f"P-value: {p_value:.6f}")

Correlation between residual component and the US prime rate: -0.418956
P-value: 0.000099


In [22]:
# Calculate correlation between residual component and LIBOR 3M Rate
correlation, p_value = pearsonr(residual.dropna(), corrdf['LIBOR___3_Month'])
print(f"Correlation between residual component and the LIBOR 3 Month Rate: {correlation:.3f}")
print(f"P-value: {p_value:.5f}")

Correlation between residual component and the LIBOR 3 Month Rate: -0.392
P-value: 0.00030


In [23]:
# Calculate correlation between residual component and US Treasury Constant Maturity 1 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___1_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 1 Year: {correlation:.3f}")
print(f"P-value: {p_value:.5f}")

Correlation between residual component and the US Treasury Constant Maturity 1 Year: -0.358
P-value: 0.00104


In [24]:
# Calculate correlation between residual component and US Treasury Constant Maturity 5 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___5_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 5 Year: {correlation:.3f}")
print(f"P-value: {p_value:.5f}")

Correlation between residual component and the US Treasury Constant Maturity 5 Year: -0.260
P-value: 0.01918


In [25]:
# Calculate correlation between residual component and US Treasury Constant Maturity 10 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___10_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 10 Year: {correlation:.3f}")
print(f"P-value: {p_value:.5f}")

Correlation between residual component and the US Treasury Constant Maturity 10 Year: -0.234
P-value: 0.03546


In [26]:
# Calculate correlation between residual component and US Treasury Constant Maturity 30 Year
correlation, p_value = pearsonr(residual.dropna(), corrdf['United_States_Treasury_Constant_Maturity___30_Year'])
print(f"Correlation between residual component and the US Treasury Constant Maturity 30 Year: {correlation:.3f}")
print(f"P-value: {p_value:.5f}")

Correlation between residual component and the US Treasury Constant Maturity 30 Year: -0.230
P-value: 0.03878


In [52]:
# Calculate correlation between trend component and Account Management S&M spend (1M lag)
correlation, p_value = pearsonr(trd['trend'], trd['Account Management'])
print(f"Correlation between trend component and the Existing Account S&M Spend: {correlation:.3f}")
print(f"P-value: {p_value:.10f}")
# Note: Total Existing S&M spend was tried as well as just sales team farmer expense and lowered correlation

Correlation between trend component and the Existing Account S&M Spend: 0.910
P-value: 0.0000000001


# Regression Analyses

##### Linear Regression Showing Impact of US Prime Rate on Residual Component of Ocrolus Volume (Signal Decomposed Volume)

In [27]:
# Linear regression: Dependent variable = Residual Ocrolus Volume (seasonality and trend removed); Independent variable = US Prime Rate
X = corrdf['US_Prime_Rate']
y = residual.dropna()
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())
# Key takeaways:
    # The US Prime Rate has a significant negative correlation with the residual component of Ocrolus Volume
    # US Prime Rate does not have a significant correlation with overall volume (trend in overall platform growth clouds relationship between US Prime Rate and Volume)

                            OLS Regression Results                            
Dep. Variable:                  resid   R-squared:                       0.176
Model:                            OLS   Adj. R-squared:                  0.165
Method:                 Least Squares   F-statistic:                     16.82
Date:                Tue, 23 May 2023   Prob (F-statistic):           9.91e-05
Time:                        13:37:36   Log-Likelihood:                -1553.7
No. Observations:                  81   AIC:                             3111.
Df Residuals:                      79   BIC:                             3116.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          8.129e+07   2.25e+07      3.621

##### Linear regression showing relationship between Account Management Spend and the underlying trend in Ocrolus Volume

In [53]:
# Linear regression: Dependent variable = Trend Ocrolus Volume (seasonality and residual removed); Independent variable = Account Management Expenses
X = trd['Account Management']
y = trd['trend']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())
# Key takeaways:
    # The spend on account amnagement has a significant positive correlation with the trend component of Ocrolus Volume

                            OLS Regression Results                            
Dep. Variable:                  trend   R-squared:                       0.828
Model:                            OLS   Adj. R-squared:                  0.821
Method:                 Least Squares   F-statistic:                     115.9
Date:                Tue, 23 May 2023   Prob (F-statistic):           1.14e-10
Time:                        14:07:45   Log-Likelihood:                -466.97
No. Observations:                  26   AIC:                             937.9
Df Residuals:                      24   BIC:                             940.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               3.898e+08   6.49

##### Linear Regression Showing Impact of US Prime Rate on Ocrolus Volume

In [28]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume; Independent variable = US Prime Rate
X = ust['US_Prime_Rate']
y = ust['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     1.116
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.296
Time:                        13:37:36   Log-Likelihood:                -992.87
No. Observations:                  49   AIC:                             1990.
Df Residuals:                      47   BIC:                             1994.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          3.948e+08   7.55e+07      5.227

##### Linear Regression Showing Impact of US Prime Rate on Percent Change in Ocrolus Volume

In [29]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume % Change; Independent variable = US Prime Rate
X = ust['US_Prime_Rate']
y = ust['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                 -0.005
Method:                 Least Squares   F-statistic:                    0.7845
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.380
Time:                        13:37:36   Log-Likelihood:                -208.13
No. Observations:                  49   AIC:                             420.3
Df Residuals:                      47   BIC:                             424.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const            12.6421      8.370      1.510

##### Linear Regression Showing Impact of S&M Spend and US Prime Rate on Ocrolus Volume

In [30]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume; Independent variables = Total S&M Spend and US Prime Rate
X = ust[['Total S_M Spend', 'US_Prime_Rate']]
y = ust['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.791
Model:                            OLS   Adj. R-squared:                  0.782
Method:                 Least Squares   F-statistic:                     86.97
Date:                Tue, 23 May 2023   Prob (F-statistic):           2.35e-16
Time:                        13:37:36   Log-Likelihood:                -955.11
No. Observations:                  49   AIC:                             1916.
Df Residuals:                      46   BIC:                             1922.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const            1.618e+08   3.96e+07     

##### Linear Regression Showing Impact of Existing Account Related S&M Spend on Ocrolus Volume

In [31]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume; Independent variables = Sales Team Farmer and Account Management Expenses
X = esm[['Sales Team Farmer Expense', 'Account Management']]
y = esm['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.083
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     1.034
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.371
Time:                        13:37:36   Log-Likelihood:                -511.07
No. Observations:                  26   AIC:                             1028.
Df Residuals:                      23   BIC:                             1032.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

##### Linear Regression Showing Impact of Total Existing Account Related S&M Spend on Ocrolus Volume

In [55]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume; Independent variable = Total S&M Spend Related to Existing Accounts
X = esm['Total Existing Account S&M']
y = esm['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                 -0.029
Method:                 Least Squares   F-statistic:                    0.3062
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.585
Time:                        14:10:28   Log-Likelihood:                -512.03
No. Observations:                  26   AIC:                             1028.
Df Residuals:                      24   BIC:                             1031.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

##### Linear Regression Showing Impact of Existing Account Related S&M Spend on % Change in Ocrolus Volume

In [33]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume % Change; Independent variables = Sales Team Farmer and Account Management Expenses
X = esm[['Sales Team Farmer Expense', 'Account Management']]
y = esm['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                       0.142
Model:                            OLS   Adj. R-squared:                  0.068
Method:                 Least Squares   F-statistic:                     1.908
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.171
Time:                        13:37:36   Log-Likelihood:                -110.80
No. Observations:                  26   AIC:                             227.6
Df Residuals:                      23   BIC:                             231.4
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

##### Linear Regression Showing Impact of Account Management Expense on Volume % Change

In [34]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume % Change; Independent variables = Account Management Expenses
X = esm['Account Management']
y = esm['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())
# Key Takeaways:
    # Existing Account related S&M spend is slightly more related to % volume change than nominal volume amount
    # Although significant at 0.1 alpha level when included with sales team farmer expenses, account management expenses alone are not significant

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                       0.063
Model:                            OLS   Adj. R-squared:                  0.024
Method:                 Least Squares   F-statistic:                     1.620
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.215
Time:                        13:37:36   Log-Likelihood:                -111.95
No. Observations:                  26   AIC:                             227.9
Df Residuals:                      24   BIC:                             230.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  6.1478      7

##### Linear Regression Showing Impact of Total Existing Account Related S&M Spend on % Change in Ocrolus Volume

In [35]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume Percent Change; Independent variable = Total S&M Spend Related to Existing Accounts
X = esm['Total Existing Account S&M']
y = esm['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                 -0.022
Method:                 Least Squares   F-statistic:                    0.4646
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.502
Time:                        13:37:36   Log-Likelihood:                -112.55
No. Observations:                  26   AIC:                             229.1
Df Residuals:                      24   BIC:                             231.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

##### Linear Regression Showing Impact of Existing Account Related S&M Spend and US Prime Rate on Ocrolus Volume

In [56]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume; Independent variables = US Prime Rate and Account Management Expenses
X = esm[['Account Management', 'US_Prime_Rate']]
y = esm['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())
# Note: Addint Sales Farmer expense lowers significance

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.161
Model:                            OLS   Adj. R-squared:                  0.088
Method:                 Least Squares   F-statistic:                     2.207
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.133
Time:                        14:11:24   Log-Likelihood:                -509.91
No. Observations:                  26   AIC:                             1026.
Df Residuals:                      23   BIC:                             1030.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const               4.952e+08   4.82

##### Linear Regression Showing Impact of Total Existing Account S&M Spend and US Prime Rate on Ocrolus Volume

In [37]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume; Independent variables = US Prime Rate and Total Existing Account S&M Spend
X = esm[['Total Existing Account S&M', 'US_Prime_Rate']]
y = esm['Volume']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())
# Key Takeaways:
    # All variables in the model are significant at the 0.1 alpha level
    # This indicates that the US Prime Rate has a significant negative correlation with the portion of volume not impacted by existing account S&M spend

                            OLS Regression Results                            
Dep. Variable:                 Volume   R-squared:                       0.191
Model:                            OLS   Adj. R-squared:                  0.120
Method:                 Least Squares   F-statistic:                     2.710
Date:                Tue, 23 May 2023   Prob (F-statistic):             0.0878
Time:                        13:37:36   Log-Likelihood:                -509.44
No. Observations:                  26   AIC:                             1025.
Df Residuals:                      23   BIC:                             1029.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               

##### Linear Regression Showing Impact of Existing Account Related S&M Spend and US Prime Rate on % Change in Ocrolus Volume

In [38]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume % Change; Independent variables = US Prime Rate, Sales Team Farmer Expenses, and Account Management Expenses
X = esm[['Sales Team Farmer Expense', 'Account Management', 'US_Prime_Rate']]
y = esm['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                       0.189
Model:                            OLS   Adj. R-squared:                  0.078
Method:                 Least Squares   F-statistic:                     1.705
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.195
Time:                        13:37:36   Log-Likelihood:                -110.08
No. Observations:                  26   AIC:                             228.2
Df Residuals:                      22   BIC:                             233.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

##### Linear Regression Showing Impact of Total Existing Account S&M Spend and US Prime Rate on % Change in Ocrolus Volume

In [39]:
# Multiple Linear regression: Dependent variable = Ocrolus Volume % Change; Independent variables = US Prime Rate and Total Existing Account S&M Spend
X = esm[['Total Existing Account S&M', 'US_Prime_Rate']]
y = esm['Volume_Change']
X = sm.add_constant(X)
model=sm.OLS(y,X)
results = model.fit()
print(results.summary())
# Key Takeaways:
    # Model is not significant when looking at % change in volume (yet is when looking at nominal volume)

                            OLS Regression Results                            
Dep. Variable:          Volume_Change   R-squared:                       0.171
Model:                            OLS   Adj. R-squared:                  0.099
Method:                 Least Squares   F-statistic:                     2.369
Date:                Tue, 23 May 2023   Prob (F-statistic):              0.116
Time:                        13:37:36   Log-Likelihood:                -110.36
No. Observations:                  26   AIC:                             226.7
Df Residuals:                      23   BIC:                             230.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               