In [1]:
# Dependencies
import pandas as pd
import plotly.express as plt
import os
import statsmodels.api as sm

In [2]:
bikecounts_file = os.path.join('../','Resources', 'bikecounts.csv')
bikecounts_df = pd.read_csv(bikecounts_file)
bikecounts_df['Day'] = pd.to_datetime(bikecounts_df['Day'])
bikecounts_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47 (S),521.0,1067,1440.0,1307.0,4335
4,4,2016-04-05,2016-04-05,42.1,26.1,0,1416.0,2617,3081.0,2357.0,9471


In [3]:
# Discarding the "Unnamed" Column
bikecounts_df.drop(columns = ["Unnamed: 0"], inplace =True)

In [4]:
Median_Temp = (bikecounts_df["High Temp (°F)"] + bikecounts_df["Low Temp (°F)"])/2
bikecounts_df["Median Temp (°F)"] = Median_Temp
bikecounts_df.head()

Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total,Median Temp (°F)
0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497,72.05
1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922,51.95
2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759,36.95
3,2016-04-04,2016-04-04,44.1,33.1,0.47 (S),521.0,1067,1440.0,1307.0,4335,38.6
4,2016-04-05,2016-04-05,42.1,26.1,0,1416.0,2617,3081.0,2357.0,9471,34.1


In [5]:
# Creating a function to discard unneeded letters attached to Precipitation values
def discard_characters(real_value):
    for x in ['T', '(S)', ' ']:
        real_value = real_value.replace(x, '')
    return real_value.strip()

bikecounts_df["Precipitation"] = bikecounts_df["Precipitation"].apply(discard_characters)
bikecounts_df["Precipitation"] = bikecounts_df["Precipitation"].replace('',0).astype('float64')
bikecounts_df.head(15)

Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total,Median Temp (°F)
0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497,72.05
1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922,51.95
2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759,36.95
3,2016-04-04,2016-04-04,44.1,33.1,0.47,521.0,1067,1440.0,1307.0,4335,38.6
4,2016-04-05,2016-04-05,42.1,26.1,0.0,1416.0,2617,3081.0,2357.0,9471,34.1
5,2016-04-06,2016-04-06,45.0,30.0,0.0,1885.0,3329,3856.0,2849.0,11919,37.5
6,2016-04-07,2016-04-07,57.0,53.1,0.09,1276.0,2581,3282.0,2457.0,9596,55.05
7,2016-04-08,2016-04-08,46.9,44.1,0.01,1982.0,3455,4113.0,3194.0,12744,45.5
8,2016-04-09,2016-04-09,43.0,37.9,0.09,504.0,997,1507.0,1502.0,4510,40.45
9,2016-04-10,2016-04-10,48.9,30.9,0.0,1447.0,2387,3132.0,2160.0,9126,39.9


# Hypothesis

From the investigation of the dataset, it can be hypothesized that:

a. There is a dependency of number of bikes crossing the brooklyn bridge on both temperature and precipitation.

b. As precipitation rises with a corresponding decline in temperature, the number of bikes crossing the brooklyn bridge reduces.

# Null Hypothesis

If the number of bikers that cross the bridge is independent on the precipitation and temperature, then a decline in temperature and a rise in precipitation would have no effect in the reduction of the number of bikers crossing the brooklyn bridge.

In [6]:
# Linear Regression Model
Independent = bikecounts_df[["Median Temp (°F)", "Precipitation"]]
Dependent = bikecounts_df["Brooklyn Bridge"]
Z = sm.add_constant(Independent)
Model_for_Regression = sm.OLS(Dependent, Z)
final = Model_for_Regression.fit()

In [7]:
print(final.summary())

                            OLS Regression Results                            
Dep. Variable:        Brooklyn Bridge   R-squared:                       0.605
Model:                            OLS   Adj. R-squared:                  0.576
Method:                 Least Squares   F-statistic:                     20.70
Date:                Sat, 09 Oct 2021   Prob (F-statistic):           3.55e-06
Time:                        01:31:28   Log-Likelihood:                -235.22
No. Observations:                  30   AIC:                             476.4
Df Residuals:                      27   BIC:                             480.6
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const             -237.9800    697.754  

In [8]:
print(final.summary(2))

                            OLS Regression Results                            
Dep. Variable:                      2   R-squared:                       0.605
Model:                            OLS   Adj. R-squared:                  0.576
Method:                 Least Squares   F-statistic:                     20.70
Date:                Sat, 09 Oct 2021   Prob (F-statistic):           3.55e-06
Time:                        01:31:34   Log-Likelihood:                -235.22
No. Observations:                  30   AIC:                             476.4
Df Residuals:                      27   BIC:                             480.6
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const             -237.9800    697.754  

# Conclusion

With the P-value getting close to zero (0) in relation to both Median Temperature and Precipitation, it disproves the null hypothesis. We find a considerable relationship between the number of bikes crossing the brooklyn bridge and Temperature and Precipitation.