In [2]:
import pandas as pd
from datetime import datetime as dt
from scipy import stats

In [3]:
hawaii_analysis = pd.read_csv('../Resources/hawaii_measurements.csv')
hawaii_analysis.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [4]:
hawaii_analysis.dtypes

station     object
date        object
prcp       float64
tobs         int64
dtype: object

In [5]:
# date = string, therefore need to convert using datetime
# https://datatofish.com/strings-to-datetime-pandas/
hawaii_analysis['date'] = pd.to_datetime(hawaii_analysis['date'], format = '%Y-%m-%d')
hawaii_analysis.dtypes

station            object
date       datetime64[ns]
prcp              float64
tobs                int64
dtype: object

In [6]:
hawaii_analysis.set_index(hawaii_analysis['date'], inplace = True)
hawaii_analysis.head()

Unnamed: 0_level_0,station,date,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01,USC00519397,2010-01-01,0.08,65
2010-01-02,USC00519397,2010-01-02,0.0,63
2010-01-03,USC00519397,2010-01-03,0.0,74
2010-01-04,USC00519397,2010-01-04,0.0,76
2010-01-06,USC00519397,2010-01-06,,73


In [7]:
hawaii_analysis.drop(columns='date', inplace=True)
hawaii_analysis.head()

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.0,63
2010-01-03,USC00519397,0.0,74
2010-01-04,USC00519397,0.0,76
2010-01-06,USC00519397,,73


### Identify Avg Temp in June and December across all years and run t-test

In [8]:
june_data = hawaii_analysis[hawaii_analysis.index.month == 6]
dec_data = hawaii_analysis[hawaii_analysis.index.month == 12]

In [9]:
june_data.mean()

prcp     0.136360
tobs    74.944118
dtype: float64

In [10]:
dec_data.mean()

prcp     0.216819
tobs    71.041529
dtype: float64

In [11]:
jun_temps = june_data.tobs
dec_temps = dec_data.tobs

In [12]:
# https://docs.scipy.org/doc/scipy/reference/stats.html
# refer to ttest_ind as T-test for the mans of two independent samples i.e. sample 1 = jun, sample 2 = dec
# temps not dependent on one another
stats.ttest_ind(jun_temps, dec_temps)

Ttest_indResult(statistic=31.60372399000329, pvalue=3.9025129038616655e-191)

Across all weather stations in Hawaii, the average temperatures in June and December defer by 3.9 degrees Fahrenheit. The low p-value = 3.9e-191 would infer that we can reject the null hypothesis that the mean temperatures in June and December are related. i.e. the difference is statistically significant.