In [28]:
# Dependencies
import pandas as pd
import plotly.express as plt
from scipy.stats import linregress
import os
import statsmodels.api as sm
import re

In [35]:
def cleanval(string):
        decimal=re.findall('\d*\.?\d+',string)
        if len (decimal) == 0:
            return 0
        return float(decimal[0])

In [36]:
bikecounts_file = os.path.join('Resources/', 'bikecounts.csv')
bikecounts_df = pd.read_csv(bikecounts_file)
bikecounts_df['Day'] = pd.to_datetime(bikecounts_df['Day'])
bikecounts_df["Precipitation"] = bikecounts_df["Precipitation"].apply(lambda x :float(x) if type(x) != type("") else float(cleanval(x)))
bikecounts_df.head(20)

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47,521.0,1067,1440.0,1307.0,4335
4,4,2016-04-05,2016-04-05,42.1,26.1,0.0,1416.0,2617,3081.0,2357.0,9471
5,5,2016-04-06,2016-04-06,45.0,30.0,0.0,1885.0,3329,3856.0,2849.0,11919
6,6,2016-04-07,2016-04-07,57.0,53.1,0.09,1276.0,2581,3282.0,2457.0,9596
7,7,2016-04-08,2016-04-08,46.9,44.1,0.01,1982.0,3455,4113.0,3194.0,12744
8,8,2016-04-09,2016-04-09,43.0,37.9,0.09,504.0,997,1507.0,1502.0,4510
9,9,2016-04-10,2016-04-10,48.9,30.9,0.0,1447.0,2387,3132.0,2160.0,9126


In [47]:
cleanBikes_df=bikecounts_df.drop(columns ='Unnamed: 0')
cleanBikes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 30 non-null     object        
 1   Day                  30 non-null     datetime64[ns]
 2   High Temp (°F)       30 non-null     float64       
 3   Low Temp (°F)        30 non-null     float64       
 4   Precipitation        30 non-null     float64       
 5   Brooklyn Bridge      30 non-null     float64       
 6   Manhattan Bridge     30 non-null     int64         
 7   Williamsburg Bridge  30 non-null     float64       
 8   Queensboro Bridge    30 non-null     float64       
 9   Total                30 non-null     int64         
dtypes: datetime64[ns](1), float64(6), int64(2), object(1)
memory usage: 2.5+ KB


In [49]:
cleanBikes_df.corr()

Unnamed: 0,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
High Temp (°F),1.0,0.823853,-0.345898,0.739377,0.720175,0.764336,0.727825,0.743344
Low Temp (°F),0.823853,1.0,-0.165441,0.46455,0.465097,0.534212,0.475725,0.492124
Precipitation,-0.345898,-0.165441,1.0,-0.599514,-0.588419,-0.576076,-0.596254,-0.591079
Brooklyn Bridge,0.739377,0.46455,-0.599514,1.0,0.983148,0.980463,0.976991,0.989339
Manhattan Bridge,0.720175,0.465097,-0.588419,0.983148,1.0,0.989805,0.985867,0.996323
Williamsburg Bridge,0.764336,0.534212,-0.576076,0.980463,0.989805,1.0,0.988852,0.996639
Queensboro Bridge,0.727825,0.475725,-0.596254,0.976991,0.985867,0.988852,1.0,0.993391
Total,0.743344,0.492124,-0.591079,0.989339,0.996323,0.996639,0.993391,1.0


In [51]:
#null Hypothesis
#there is no difference between the temperture and the amount of people riding the Brooklyn Bridge.
X = bikecounts_df[["High Temp (°F)","Low Temp (°F)","Precipitation"]]
y = bikecounts_df[["Brooklyn Bridge"]]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:        Brooklyn Bridge   R-squared:                       0.712
Model:                            OLS   Adj. R-squared:                  0.679
Method:                 Least Squares   F-statistic:                     21.46
Date:                Fri, 22 Oct 2021   Prob (F-statistic):           3.31e-07
Time:                        10:37:13   Log-Likelihood:                -230.47
No. Observations:                  30   AIC:                             468.9
Df Residuals:                      26   BIC:                             474.5
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const           -718.7735    626.377     -1.

In [None]:
#Analysis:
# We reject the null hypothesis as the P_Value for low Tempture has a P_Value of .043 
# As there is strong evidence that the null hypothesis is invalid
# Another thing to note that there is also a medium correlation between the data with a  adj R^2 value of .583