In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from statsmodels.sandbox.regression.predstd import wls_prediction_std


In [3]:
df = pd.read_csv("latimes-county-totals.csv")

In [4]:
df_la = df[df['county'] == 'Los Angeles']

In [5]:
df_la = df_la.drop(labels=['date','county','fips','new_confirmed_cases'], axis=1)
# df_la = df_la.drop(labels=['date','county','fips','new_confirmed_cases','confirmed_cases'], axis=1)

In [6]:
df_la = df_la.fillna(0)

In [7]:
days = list(range(1,len(df_la)+1))
df_la['culm_day'] = days


In [8]:
df_la.head()

Unnamed: 0,confirmed_cases,deaths,new_deaths,culm_day
867,1,0,0.0,1
868,1,0,0.0,2
869,1,0,0.0,3
870,1,0,0.0,4
871,1,0,0.0,5


## Logistics


In [13]:
formula = 'deaths ~ new_deaths + culm_day + confirmed_cases'

In [14]:
model = smf.glm(formula = formula, data=df_la, family=sm.families.Binomial())
result = model.fit()
print(result.summary())
# sm.families.links.logit

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 deaths   No. Observations:                  100
Model:                            GLM   Df Residuals:                       96
Model Family:                Binomial   Df Model:                            3
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                    nan
Date:                Tue, 05 May 2020   Deviance:                          nan
Time:                        13:22:36   Pearson chi2:                 7.45e+22
No. Iterations:                   100                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept        1.352e+19   1.66e+07   8.

In [15]:
result.params

Intercept          1.352444e+19
new_deaths        -4.039715e+17
culm_day          -6.588856e+17
confirmed_cases    2.369655e+16
dtype: float64

In [18]:
X = df_la.drop(labels=['deaths'], axis=1)
lny = df_la['deaths']
gauss_log = sm.GLM(lny, X, family=sm.families.Gaussian(sm.families.links.log))
gauss_log_results = gauss_log.fit()
print(gauss_log_results.summary())
# sm.families.links.log

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 deaths   No. Observations:                  100
Model:                            GLM   Df Residuals:                       97
Model Family:                Gaussian   Df Model:                            2
Link Function:                    log   Scale:                          1906.1
Method:                          IRLS   Log-Likelihood:                -518.03
Date:                Tue, 05 May 2020   Deviance:                   1.8489e+05
Time:                        13:23:16   Pearson chi2:                 1.85e+05
No. Iterations:                     7                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
confirmed_cases -8.598e-06   2.88e-06     

Use an instance of a link class instead.
  This is separate from the ipykernel package so we can avoid doing imports until


In [26]:
# X = df_la.drop(labels=['deaths'], axis=1)
# y = df_la['deaths']

In [28]:
# model = sm.GLM(y,X, family=sm.families.Binomial())
# result = model.fit()
# print(result.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 deaths   No. Observations:                   96
Model:                            GLM   Df Residuals:                       93
Model Family:                Binomial   Df Model:                            2
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                    nan
Date:                Tue, 05 May 2020   Deviance:                          nan
Time:                        11:56:15   Pearson chi2:                 4.78e+22
No. Iterations:                   100                                         
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
confirmed_cases  2.247e+16   2645.830   8.

# Statsmodel: ordianrly linear regression

In [9]:
X = df_la.drop(labels=['deaths'], axis=1)
y = df_la['deaths']

In [10]:
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:                 deaths   R-squared (uncentered):                   0.984
Model:                            OLS   Adj. R-squared (uncentered):              0.983
Method:                 Least Squares   F-statistic:                              1883.
Date:                Sun, 03 May 2020   Prob (F-statistic):                    4.17e-83
Time:                        14:52:20   Log-Likelihood:                         -495.85
No. Observations:                  96   AIC:                                      997.7
Df Residuals:                      93   BIC:                                      1005.
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                      coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------