In [None]:
pip install linearmodels



In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

In [None]:
# setup data
df = pd.read_excel('Econ 101 Data.xlsx')
df_annual = pd.read_excel('Econ 101 Data Annual.xlsx')
df_annual

Unnamed: 0,Year,Total VC Investments
0,1980,600.0
1,1981,1200.0
2,1982,1700.0
3,1983,3000.0
4,1984,3000.0
5,1985,2800.0
6,1986,3100.0
7,1987,3300.0
8,1988,3300.0
9,1989,3400.0


In [None]:
df_independent = pd.read_excel('Independent variables.xlsx')
df_indep_annual = pd.read_excel('Independent Var Annual.xlsx')
df_indep_annual

Unnamed: 0,Year,Fed Funds Rate,Unemployment Rate,Real GDP,Current Activity,Future Activity,PMI
0,1980,13.355833,7.175,7257.3165,-22.441667,32.533333,44.533333
1,1981,16.378333,7.616667,7441.485,-1.875,63.091667,46.233333
2,1982,12.258333,9.708333,7307.314,-3.558333,70.05,38.483333
3,1983,9.086667,9.6,7642.2655,36.3,74.908333,59.3
4,1984,10.225,7.508333,8195.29525,22.641667,47.483333,55.766667
5,1985,8.100833,7.191667,8537.00375,10.258333,33.3,49.183333
6,1986,6.805,7.0,8832.61125,11.808333,29.158333,51.058333
7,1987,6.6575,6.175,9137.74475,17.35,21.9,57.491667
8,1988,7.568333,5.491667,9519.42675,19.85,13.266667,56.216667
9,1989,9.216667,5.258333,9869.00325,0.658333,-3.1,48.925


In [None]:
data_with_const = add_constant(df_independent)
data_with_const = df_independent.drop(['Future Activity', 'Current Activity'], axis=1)

# Calculate VIF scores for each variable
vif_data = pd.DataFrame()
vif_data["Variable"] = data_with_const.columns
vif_data["VIF"] = [variance_inflation_factor(data_with_const.values, i) for i in range(data_with_const.shape[1])]

# Print the results
print(vif_data)

            Variable         VIF
0     Fed Funds Rate    5.825370
1  Unemployment Rate   10.673597
2           Real GDP  274.538251
3                PMI  201.717716


In [None]:
# run linear regression for seed

X = df_independent[['Fed Funds Rate', 'Unemployment Rate', 'PMI']]
y = df_independent['Current Activity']

X = sm.add_constant(X)

model_iv = sm.OLS(y, X)
results = model_iv.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       Current Activity   R-squared:                       0.819
Model:                            OLS   Adj. R-squared:                  0.802
Method:                 Least Squares   F-statistic:                     48.21
Date:                Tue, 04 Jun 2024   Prob (F-statistic):           5.67e-12
Time:                        09:55:36   Log-Likelihood:                -119.19
No. Observations:                  36   AIC:                             246.4
Df Residuals:                      32   BIC:                             252.7
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const              -118.6846     19.16

In [None]:
business_hat = results.predict(X)
X_t = y - business_hat
df_X = pd.DataFrame(X_t)
df_X['Fed Funds Rate'] = df_independent['Fed Funds Rate']
df_X = df_X.rename(columns={df_X.columns[0]: 'Business_hat diff'})
df_X

Unnamed: 0,Business_hat diff,Fed Funds Rate
0,1.355152,0.11
1,-1.335692,0.123333
2,-4.267892,0.136667
3,-6.031866,0.16
4,-2.663862,0.36
5,-8.2109,0.373333
6,0.574256,0.396667
7,3.523581,0.45
8,11.528728,0.7
9,10.810381,0.95


In [None]:
# seed
y = df['Pre-seed/Seed']

df_X = sm.add_constant(df_X)

model_seed = sm.OLS(y, df_X)
results = model_seed.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:          Pre-seed/Seed   R-squared:                       0.099
Model:                            OLS   Adj. R-squared:                  0.044
Method:                 Least Squares   F-statistic:                     1.811
Date:                Tue, 04 Jun 2024   Prob (F-statistic):              0.179
Time:                        09:55:36   Log-Likelihood:                -61.003
No. Observations:                  36   AIC:                             128.0
Df Residuals:                      33   BIC:                             132.8
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 2.1605      0.30

In [None]:
# early stage

y = df['Early VC']

model_startup = sm.OLS(y, df_X)
results = model_startup.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:               Early VC   R-squared:                       0.067
Model:                            OLS   Adj. R-squared:                  0.011
Method:                 Least Squares   F-statistic:                     1.190
Date:                Tue, 04 Jun 2024   Prob (F-statistic):              0.317
Time:                        09:55:36   Log-Likelihood:                -108.87
No. Observations:                  36   AIC:                             223.7
Df Residuals:                      33   BIC:                             228.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                11.1940      1.16

In [None]:
# later stage

y = df['Later VC']

model_later = sm.OLS(y, df_X)
results = model_later.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:               Later VC   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                 -0.037
Method:                 Least Squares   F-statistic:                    0.3832
Date:                Tue, 04 Jun 2024   Prob (F-statistic):              0.685
Time:                        09:55:36   Log-Likelihood:                -132.37
No. Observations:                  36   AIC:                             270.7
Df Residuals:                      33   BIC:                             275.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                16.4878      2.24

In [None]:
y = df['Total']

model = sm.OLS(y, df_X)
results = model.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                  Total   R-squared:                       0.052
Model:                            OLS   Adj. R-squared:                 -0.005
Method:                 Least Squares   F-statistic:                    0.9081
Date:                Tue, 04 Jun 2024   Prob (F-statistic):              0.413
Time:                        09:55:36   Log-Likelihood:                -159.46
No. Observations:                  36   AIC:                             324.9
Df Residuals:                      33   BIC:                             329.7
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                40.5575      4.75

In [None]:
# Annual VC Investment regression

X = df_indep_annual[['Fed Funds Rate', 'Unemployment Rate', 'PMI']]
y = df_indep_annual['Current Activity']

X = sm.add_constant(X)

model_iv = sm.OLS(y, X)
results = model_iv.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:       Current Activity   R-squared:                       0.763
Model:                            OLS   Adj. R-squared:                  0.745
Method:                 Least Squares   F-statistic:                     42.92
Date:                Tue, 04 Jun 2024   Prob (F-statistic):           1.42e-12
Time:                        09:55:37   Log-Likelihood:                -144.73
No. Observations:                  44   AIC:                             297.5
Df Residuals:                      40   BIC:                             304.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const              -137.0565     13.80

In [None]:
business_hat = results.predict(X)
X_t = y - business_hat
df_X = pd.DataFrame(X_t)
df_X['Fed Funds Rate'] = df_indep_annual['Fed Funds Rate']
df_X = df_X.rename(columns={df_X.columns[0]: 'Business_hat diff'})
df_X

Unnamed: 0,Business_hat diff,Fed Funds Rate
0,-16.227974,13.355833
1,-2.205282,16.378333
2,17.543876,12.258333
3,4.047963,9.086667
4,0.595024,10.225
5,7.113693,8.100833
6,4.56167,6.805
7,-6.300512,6.6575
8,-0.453118,7.568333
9,-1.070137,9.216667


In [None]:
# total investments 1980-2023
y = df_annual['Total VC Investments']

df_X = sm.add_constant(df_X)

model_seed = sm.OLS(y, df_X)
results = model_seed.fit()

print(results.summary())

                             OLS Regression Results                             
Dep. Variable:     Total VC Investments   R-squared:                       0.202
Model:                              OLS   Adj. R-squared:                  0.164
Method:                   Least Squares   F-statistic:                     5.205
Date:                  Tue, 04 Jun 2024   Prob (F-statistic):            0.00967
Time:                          09:55:37   Log-Likelihood:                -544.96
No. Observations:                    44   AIC:                             1096.
Df Residuals:                        41   BIC:                             1101.
Df Model:                             2                                         
Covariance Type:              nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             

In [None]:
df_X['After 1998'] = (df_indep_annual['Year'] > 1998).astype(int)
df_X

Unnamed: 0,const,Business_hat diff,Fed Funds Rate,After 1998
0,1.0,-16.227974,13.355833,0
1,1.0,-2.205282,16.378333,0
2,1.0,17.543876,12.258333,0
3,1.0,4.047963,9.086667,0
4,1.0,0.595024,10.225,0
5,1.0,7.113693,8.100833,0
6,1.0,4.56167,6.805,0
7,1.0,-6.300512,6.6575,0
8,1.0,-0.453118,7.568333,0
9,1.0,-1.070137,9.216667,0


In [None]:
model_seed = sm.OLS(y, df_X)
results = model_seed.fit()

print(results.summary())

                             OLS Regression Results                             
Dep. Variable:     Total VC Investments   R-squared:                       0.324
Model:                              OLS   Adj. R-squared:                  0.274
Method:                   Least Squares   F-statistic:                     6.397
Date:                  Tue, 04 Jun 2024   Prob (F-statistic):            0.00121
Time:                          09:55:37   Log-Likelihood:                -541.32
No. Observations:                    44   AIC:                             1091.
Df Residuals:                        40   BIC:                             1098.
Df Model:                             3                                         
Covariance Type:              nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             