# Bonus: Temperature Analysis I

In [1]:
import pandas as pd
from datetime import datetime as dt

In [3]:
# "tobs" is "temperature observations"
df = pd.read_csv('./Resources/hawaii_measurements.csv')
df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [4]:
# Convert the date column format from string to datetime
df['date'] = df['date'].astype('datetime64[ns]')
df['date']

0       2010-01-01
1       2010-01-02
2       2010-01-03
3       2010-01-04
4       2010-01-06
           ...    
19545   2017-08-19
19546   2017-08-20
19547   2017-08-21
19548   2017-08-22
19549   2017-08-23
Name: date, Length: 19550, dtype: datetime64[ns]

In [5]:
# Set the date column as the DataFrame index
df = df.set_index("date")
df

Unnamed: 0_level_0,station,prcp,tobs
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.00,63
2010-01-03,USC00519397,0.00,74
2010-01-04,USC00519397,0.00,76
2010-01-06,USC00519397,,73
...,...,...,...
2017-08-19,USC00516128,0.09,71
2017-08-20,USC00516128,,78
2017-08-21,USC00516128,0.56,76
2017-08-22,USC00516128,0.50,76


In [6]:
# Drop the date column
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,station,prcp,tobs
0,USC00519397,0.08,65
1,USC00519397,0.00,63
2,USC00519397,0.00,74
3,USC00519397,0.00,76
4,USC00519397,,73
...,...,...,...
19545,USC00516128,0.09,71
19546,USC00516128,,78
19547,USC00516128,0.56,76
19548,USC00516128,0.50,76


### Compare June and December data across all years 

In [14]:
from scipy import stats
import numpy as np
import pandas as pd
import datetime as dt

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, extract

In [8]:
# Filter data for desired months

In [15]:
# create engine to hawaii.sqlite
engine = create_engine("sqlite:///Resources/hawaii.sqlite")

# reflect an existing database into a new model
Base = automap_base()

# reflect the tables
Base.prepare(engine, reflect=True)

# View all of the classes that automap found
Base.classes.keys()

# Save references to each table
Measurement = Base.classes.measurement
Station = Base.classes.station

# Create our session (link) from Python to the DB
session = Session(engine)

In [33]:
# Identify the average temperature for June

##the raw data for june
june_temp = session.query(Measurement.date, Measurement.tobs)\
    .filter(func.strftime("%m", Measurement.date) == "06").all()
        
#june_temp

In [34]:
# Identify the average temperature for December

##the raw data for Dec
dec_temp = session.query(Measurement.date, Measurement.tobs)\
    .filter(func.strftime("%m",Measurement.date) == '12').all()

#dec_temp

In [35]:
# convert raw data to DataFrames to label them and then concatenate into one DataFrame
june_df = pd.DataFrame(june_temp,columns=["Date","Temperature"])
dec_df = pd.DataFrame(dec_temp,columns=["Date","Temperature"])
june_df["Month"] = "June"
dec_df["Month"] = "December"
temp_df = pd.concat([june_df,dec_df], ignore_index=True)

# calculate the average for both June and December
avg_temp_month = temp_df.groupby("Month").mean()
avg_temp_month

Unnamed: 0_level_0,Temperature
Month,Unnamed: 1_level_1
December,71.041529
June,74.944118


In [45]:
# Create collections of temperature data

june = [june_df["Temperature"]]
dec = [dec_df["Temperature"]]

In [47]:
# Run paired t-test

from scipy import stats
stats.ttest_ind(june_df["Temperature"], dec_df["Temperature"])

Ttest_indResult(statistic=31.60372399000329, pvalue=3.9025129038616655e-191)

### Analysis



Paired t-test is used to determine the difference in the June and December average temperature in Honolulu, Hawaii for a time period between 2010 and 2017. The paired t-test is used because the two compared samples of temperature observations are related to the same location and represent a difference between summer temperature (after a cold season is over) and winter temperature (after a worm season is over).

The t-statistic value is 31.604, and along with a given degrees of freedom, this can be used to calculate a p-value.

The p-value is less than 0.05. It can be conclude that the difference in means is statistically significant and that there is a meaningful difference in temperature between June and December.