In [23]:
# Dependencies
import pandas as pd
import plotly.express as plt
import os
import statsmodels.api as sm

In [2]:
!pip install plotly

Collecting plotly
  Downloading plotly-4.14.3-py2.py3-none-any.whl (13.2 MB)
Collecting retrying>=1.3.3
  Downloading retrying-1.3.3.tar.gz (10 kB)
Building wheels for collected packages: retrying
  Building wheel for retrying (setup.py): started
  Building wheel for retrying (setup.py): finished with status 'done'
  Created wheel for retrying: filename=retrying-1.3.3-py3-none-any.whl size=11434 sha256=1f227db4a30a0905c426c95b07aa5962b7cbdd35c785842c5c5b1797ab00e0d8
  Stored in directory: c:\users\ruthi\appdata\local\pip\cache\wheels\c4\a7\48\0a434133f6d56e878ca511c0e6c38326907c0792f67b476e56
Successfully built retrying
Installing collected packages: retrying, plotly
Successfully installed plotly-4.14.3 retrying-1.3.3


In [16]:
#import data
bikecounts_file = os.path.join('../','Resources', 'bikecounts.csv')
bikecounts_df = pd.read_csv(bikecounts_file)
bikecounts_df['Day'] = pd.to_datetime(bikecounts_df['Day'])
bikecounts_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47 (S),521.0,1067,1440.0,1307.0,4335
4,4,2016-04-05,2016-04-05,42.1,26.1,0,1416.0,2617,3081.0,2357.0,9471


In [17]:
# Remove the index column
bikecounts_df.drop(columns=["Unnamed: 0"], inplace=True)

# Calculate the median temperature
bikecounts_df['Median Temp (°F)'] = (bikecounts_df['High Temp (°F)'] + bikecounts_df['Low Temp (°F)']) / 2
bikecounts_df.head()

Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total,Median Temp (°F)
0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497,72.05
1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922,51.95
2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759,36.95
3,2016-04-04,2016-04-04,44.1,33.1,0.47 (S),521.0,1067,1440.0,1307.0,4335,38.6
4,2016-04-05,2016-04-05,42.1,26.1,0,1416.0,2617,3081.0,2357.0,9471,34.1


In [18]:
# Get rid of symbols/characters that are not needed and change precipitation to float
def clear_characters(number):
    for chara in ['T', '(S)', ' ']:
        number = number.replace(chara, '')
    return number.strip()

In [19]:
bikecounts_df['Precipitation'] = bikecounts_df['Precipitation'].apply(clear_characters)
bikecounts_df['Precipitation'] = bikecounts_df['Precipitation'].replace('',0).astype('float64')
bikecounts_df.head()

Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total,Median Temp (°F)
0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497,72.05
1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922,51.95
2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759,36.95
3,2016-04-04,2016-04-04,44.1,33.1,0.47,521.0,1067,1440.0,1307.0,4335,38.6
4,2016-04-05,2016-04-05,42.1,26.1,0.0,1416.0,2617,3081.0,2357.0,9471,34.1


# HYPOTHESIS

## The number of bikes crossing brooklyn bridge in a day is dependent upon the precipitation and temperature.

## The number of bikes crossing the brooklyn bridge reduces with a decrease in temperature and  increase in precipitation.

# NULL HYPOTHESIS
## The increase in precipitation and decrease in temperature will not lower the number of bikes crossing the brooklyn bridge if the number of bikes crossing the bridge is independent upon precipitation and temperature.

In [24]:
dependent_variable = bikecounts_df['Brooklyn Bridge']
independent_variable = bikecounts_df[['Precipitation', 'Median Temp (°F)']]

X = sm.add_constant(independent_variable)
regres_model = sm.OLS(dependent_variable, X)
outcome = regres_model.fit()

In [28]:
outcome.summary()

0,1,2,3
Dep. Variable:,Brooklyn Bridge,R-squared:,0.605
Model:,OLS,Adj. R-squared:,0.576
Method:,Least Squares,F-statistic:,20.7
Date:,"Sat, 27 Mar 2021",Prob (F-statistic):,3.55e-06
Time:,01:06:00,Log-Likelihood:,-235.22
No. Observations:,30,AIC:,476.4
Df Residuals:,27,BIC:,480.6
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-237.9800,697.754,-0.341,0.736,-1669.653,1193.693
Precipitation,-4395.3591,1208.114,-3.638,0.001,-6874.204,-1916.514
Median Temp (°F),51.1740,12.478,4.101,0.000,25.571,76.777

0,1,2,3
Omnibus:,3.614,Durbin-Watson:,1.016
Prob(Omnibus):,0.164,Jarque-Bera (JB):,2.743
Skew:,-0.741,Prob(JB):,0.254
Kurtosis:,3.026,Cond. No.,571.0


In [29]:
print(outcome.summary2())

                     Results: Ordinary least squares
Model:                 OLS                Adj. R-squared:       0.576     
Dependent Variable:    Brooklyn Bridge    AIC:                  476.4342  
Date:                  2021-03-27 01:06   BIC:                  480.6378  
No. Observations:      30                 Log-Likelihood:       -235.22   
Df Model:              2                  F-statistic:          20.70     
Df Residuals:          27                 Prob (F-statistic):   3.55e-06  
R-squared:             0.605              Scale:                4.2026e+05
--------------------------------------------------------------------------
                   Coef.     Std.Err.    t    P>|t|    [0.025     0.975]  
--------------------------------------------------------------------------
const             -237.9800  697.7538 -0.3411 0.7357 -1669.6526  1193.6925
Precipitation    -4395.3591 1208.1137 -3.6382 0.0011 -6874.2036 -1916.5145
Median Temp (°F)    51.1740   12.4783  4.1010 0

## The P-value for the Precipitation (0.0011) and Median Temp (0.0003) is close to 0. For this reason, it is all right to drop the null hypothesis.
## Hence, there is a significant relation between the number of bikes crossing the brooklyn bridge, the precipitation and the temperature. 