# 1. Load Modules

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.preprocessing import scale
from statsmodels.multivariate.pca import PCA
from statsmodels.stats.outliers_influence import variance_inflation_factor

  from pandas.core import datetools


# 2. Load Data & Pre-processing

In [2]:
tick_df = pd.read_csv("Tick_DB.csv")
tick_df = tick_df.drop(["pid", "lat", "lat", "fid"], axis=1)
tick_df = tick_df[tick_df.temp.notnull()]

# 3. Define Functions for Multiple Linear Regression

In [3]:
def Var_Builder(df, dep_y, ind_x):
    y = df[dep_y]
    X = df[ind_x]
    return(y, X)

def MLR(dep, ind):
    # Fit a MLR Model
    y = dep
    X = sm.add_constant(ind)
    model = sm.OLS(y, X).fit()
    # Produce R-square
    r2 = model.rsquared
    adj_r2 = model.rsquared_adj
    # Produce P-value
    pvalue = model.pvalues
    # Produce Parameters
    params = model.params
    # Produce VIF
    vif = DataFrame(0, index = ind.columns, columns=["Tolerance", "VIF"])
    for var in vif.index:
        try:
            vif.loc[var, "VIF"] = variance_inflation_factor(scale(ind), np.where(vif.index == var)[0])
        except:
            vif.loc[var, "VIF"] = 1.0
    vif["Tolerance"] = 1 / vif["VIF"]
    print(model.summary())
    return(vif)

# 4. Define Subsets for Scenarios

In [4]:
# Scenario 1
S1 = tick_df

# Scenario 2
tick_df.date = pd.to_datetime(tick_df.date)
mask_17 = ('2017-08-01' > tick_df.date) & (tick_df.date > '2017-03-01') # Spring in 2017
mask_18 = ('2018-08-01' > tick_df.date) & (tick_df.date > '2018-03-01') # Spring in 2018
S2 = tick_df[mask_17 | mask_18]

# Scenario 3
S3 = S2[S2.host_hbt.notnull()]

# 5. Apply MLR to S1, S2, S3 and Print Calibration Results

In [5]:
# Scenario 1
res = ["t_adult"]
ind = ["temp", "rh", "elv", "solar", "ndvi"]

Y1, X1 = Var_Builder(S1, res, ind)
MLR(Y1, X1)

                            OLS Regression Results                            
Dep. Variable:                t_adult   R-squared:                       0.143
Model:                            OLS   Adj. R-squared:                  0.086
Method:                 Least Squares   F-statistic:                     2.479
Date:                Thu, 09 Aug 2018   Prob (F-statistic):             0.0393
Time:                        15:51:20   Log-Likelihood:                -288.05
No. Observations:                  80   AIC:                             588.1
Df Residuals:                      74   BIC:                             602.4
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -31.5494     33.856     -0.932      0.3

Unnamed: 0,Tolerance,VIF
temp,0.02057,48.613611
rh,0.17568,5.692158
elv,0.019437,51.447012
solar,0.091647,10.911415
ndvi,0.41592,2.404308


In [6]:
# Scenario 2
res = ["t_adult"]
ind = ["temp", "rh", "elv", "solar", "ndvi"]

Y2, X2 = Var_Builder(S2, res, ind)
MLR(Y2, X2)

                            OLS Regression Results                            
Dep. Variable:                t_adult   R-squared:                       0.107
Model:                            OLS   Adj. R-squared:                 -0.011
Method:                 Least Squares   F-statistic:                    0.9101
Date:                Thu, 09 Aug 2018   Prob (F-statistic):              0.485
Time:                        15:51:20   Log-Likelihood:                -169.62
No. Observations:                  44   AIC:                             351.2
Df Residuals:                      38   BIC:                             362.0
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5909     67.861      0.023      0.9

Unnamed: 0,Tolerance,VIF
temp,0.178339,5.607289
rh,0.09363,10.680376
elv,0.12414,8.055416
solar,0.87299,1.145489
ndvi,0.572293,1.747358


In [7]:
# Scenario 3
res = ["t_adult"]
ind = ["temp", "rh", "elv", "solar", "ndvi", "host_hbt"]

Y3, X3 = Var_Builder(S3, res, ind)
MLR(Y3, X3)

                            OLS Regression Results                            
Dep. Variable:                t_adult   R-squared:                       0.204
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     1.112
Date:                Thu, 09 Aug 2018   Prob (F-statistic):              0.382
Time:                        15:51:20   Log-Likelihood:                -129.35
No. Observations:                  33   AIC:                             272.7
Df Residuals:                      26   BIC:                             283.2
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -23.7033     96.716     -0.245      0.8

Unnamed: 0,Tolerance,VIF
temp,0.229641,4.354618
rh,0.066261,15.091894
elv,0.14729,6.789344
solar,0.924763,1.081358
ndvi,0.421033,2.375113
host_hbt,0.760366,1.315156
