In [2]:
# Dependencies
import pandas as pd
import plotly.express as plt
import os
import statsmodels.api as sm

In [3]:
#import data
bikecounts_file = os.path.join('../','Resources', 'bikecounts.csv')
bikecounts_df = pd.read_csv(bikecounts_file)
bikecounts_df['Day'] = pd.to_datetime(bikecounts_df['Day'])
bikecounts_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47 (S),521.0,1067,1440.0,1307.0,4335
4,4,2016-04-05,2016-04-05,42.1,26.1,0,1416.0,2617,3081.0,2357.0,9471


In [4]:
# Get rid of symbols/characters that are not needed and change precipitation to float
def clear_characters(number):
    for chara in ['T', '(S)', ' ']:
        number = number.replace(chara, '')
    return number.strip()

In [5]:
bikecounts_df['Precipitation'] = bikecounts_df['Precipitation'].apply(clear_characters)
bikecounts_df['Precipitation'] = bikecounts_df['Precipitation'].replace('',0).astype('float64')
bikecounts_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47,521.0,1067,1440.0,1307.0,4335
4,4,2016-04-05,2016-04-05,42.1,26.1,0.0,1416.0,2617,3081.0,2357.0,9471


# HYPOTHESIS

## The number of bikes crossing brooklyn bridge is directly proportional to precipitation.


# NULL HYPOTHESIS
## There is no relation between number of bikes crossing Brooklyn Bridge and the precipitation.
### The test is being performed with an alpha value of 0.05. Hence, a pvalue less than 0.05 will result in the null hypothesis being unaccepted.

In [6]:
dependent_variable = bikecounts_df['Brooklyn Bridge']
independent_variable = bikecounts_df['Precipitation']

X = sm.add_constant(independent_variable)
regres_model = sm.OLS(dependent_variable, X)
outcome = regres_model.fit()

In [7]:
print(outcome.summary())

0,1,2,3
Dep. Variable:,Brooklyn Bridge,R-squared:,0.359
Model:,OLS,Adj. R-squared:,0.337
Method:,Least Squares,F-statistic:,15.71
Date:,"Sat, 27 Mar 2021",Prob (F-statistic):,0.000463
Time:,21:12:40,Log-Likelihood:,-242.48
No. Observations:,30,AIC:,489.0
Df Residuals:,28,BIC:,491.8
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2571.0184,166.446,15.447,0.000,2230.068,2911.968
Precipitation,-5758.9508,1452.956,-3.964,0.000,-8735.195,-2782.706

0,1,2,3
Omnibus:,3.089,Durbin-Watson:,0.757
Prob(Omnibus):,0.213,Jarque-Bera (JB):,1.919
Skew:,-0.391,Prob(JB):,0.383
Kurtosis:,2.038,Cond. No.,9.84


In [8]:
print(outcome.summary2())

                    Results: Ordinary least squares
Model:                OLS               Adj. R-squared:      0.337     
Dependent Variable:   Brooklyn Bridge   AIC:                 488.9607  
Date:                 2021-03-27 21:21  BIC:                 491.7631  
No. Observations:     30                Log-Likelihood:      -242.48   
Df Model:             1                 F-statistic:         15.71     
Df Residuals:         28                Prob (F-statistic):  0.000463  
R-squared:            0.359             Scale:               6.5768e+05
-----------------------------------------------------------------------
                Coef.     Std.Err.    t    P>|t|    [0.025     0.975]  
-----------------------------------------------------------------------
const          2571.0184  166.4464 15.4465 0.0000  2230.0684  2911.9684
Precipitation -5758.9508 1452.9556 -3.9636 0.0005 -8735.1954 -2782.7061
-----------------------------------------------------------------------
Omnibus:    

### The P-value is 0.000463 which is less than the alpha value. Therefore, it is all right to reject the null hypothesis.

### Since the R-squared value is positive (0.337), it can be established that there is a direct relationship between the number of bikes crossing the brooklyn bridge and precipitation. 