In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, date, time, timedelta
import matplotlib.pyplot as plt
from scipy import stats,optimize
from sklearn import linear_model
import statsmodels.api as sm
from sklearn.metrics import r2_score,mean_squared_error
import math
from noaa_sdk import noaa



In [38]:
raw_spiro = pd.read_csv('raw_spiro.csv',parse_dates=['Date'])
raw_spiro.set_index(['Date','Hour'],inplace=True)
raw_spiro = raw_spiro.drop(['Time'], axis=1)
raw_spiro.head()
#old data, new index

Unnamed: 0_level_0,Unnamed: 1_level_0,PEF(l/min),FEV1(l),unixDate,unixTime
Date,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-21,10,219,1.72,18313.0,36720.0
2020-02-21,11,253,1.78,18313.0,42300.0
2020-02-22,3,388,2.54,18314.0,12660.0
2020-02-22,0,415,2.89,18314.0,480.0
2020-02-23,19,476,2.78,18315.0,70320.0


In [39]:
n = noaa.NOAA()
observations = n.get_observations('11432','US',start='2020-01-01',end='2020-01-02')
for observation in observations:
    print(observation)

In [40]:
def get_weather_data(zip_code,country,factors,
                     start=raw_spiro.index.get_level_values("Date").min().strftime('%Y-%m-%d'),
                     end=raw_spiro.index.get_level_values("Date").max().strftime('%Y-%m-%d')):
    n = noaa.NOAA()
    observations = n.get_observations(zip_code,country,start=start,end=end)
    df = pd.DataFrame()
    for observation in observations:
        observation_data = {k:v for (k,v) in observation.items() if k in factors}
        observation_values = {k:v['value'] for (k,v) in observation_data.items()}
        observation_values['time'] = pd.to_datetime(observation['timestamp'])#separate into date and hour
        df = df.append(observation_values, ignore_index=True)
    return df

In [41]:
factors = ['barometricPressure','precipitationLastHour','temperature','relativeHumidity']
weather = get_weather_data('11432','US',factors)
weather['precipitationLastHour'].fillna(0, inplace=True)
weather['Date'] = [d.date() for d in weather['time']]
weather['Hour'] = [d.hour for d in weather['time']]
weather.set_index(['Date','Hour'],inplace=True)
weather.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,barometricPressure,precipitationLastHour,relativeHumidity,temperature,time
Date,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-04-06,23,101420.0,0.0,74.11363,10.0,2020-04-06 23:51:00
2020-04-06,22,101420.0,0.0,68.877362,11.1,2020-04-06 22:51:00
2020-04-06,21,101390.0,0.0,54.823569,13.3,2020-04-06 21:51:00
2020-04-06,20,101420.0,0.0,39.183839,16.1,2020-04-06 20:51:00
2020-04-06,19,101460.0,0.0,37.715838,16.7,2020-04-06 19:51:00


In [42]:
weather_spiro = weather.join(raw_spiro,how='inner').dropna()
weather_spiro = weather_spiro.reset_index()
weather_spiro.to_csv('weather_spiro.csv',index=False)
weather_spiro.head()


Unnamed: 0,Date,Hour,barometricPressure,precipitationLastHour,relativeHumidity,temperature,time,PEF(l/min),FEV1(l),unixDate,unixTime
0,2020-02-23,19,102030.0,0.0,30.412102,10.6,2020-02-23 19:51:00,476,2.78,18315.0,70320.0
1,2020-02-23,19,102030.0,0.0,33.061975,10.6,2020-02-23 19:27:00,476,2.78,18315.0,70320.0
2,2020-02-24,5,101970.0,0.0,72.888442,3.3,2020-02-24 05:51:00,294,1.62,18316.0,18780.0
3,2020-02-24,15,101830.0,0.0,60.763303,9.4,2020-02-24 15:51:00,384,2.49,18316.0,56220.0
4,2020-02-25,4,101420.0,0.0,88.909044,6.7,2020-02-25 04:51:00,382,2.93,18317.0,15240.0


In [46]:
Y = weather_spiro['PEF(l/min)']
X = weather_spiro[['barometricPressure','precipitationLastHour','relativeHumidity','temperature',
                   'unixDate','unixTime']]
X = sm.add_constant(X)

mod = sm.OLS(Y,X)
fit = mod.fit()
summary = fit.summary()
summary

0,1,2,3
Dep. Variable:,PEF(l/min),R-squared:,0.112
Model:,OLS,Adj. R-squared:,0.091
Method:,Least Squares,F-statistic:,5.321
Date:,"Tue, 07 Apr 2020",Prob (F-statistic):,3.44e-05
Time:,14:43:27,Log-Likelihood:,-1523.5
No. Observations:,261,AIC:,3061.0
Df Residuals:,254,BIC:,3086.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.178e+04,8893.537,-4.698,0.000,-5.93e+04,-2.43e+04
barometricPressure,0.0019,0.006,0.304,0.761,-0.011,0.015
precipitationLastHour,5980.9941,9435.445,0.634,0.527,-1.26e+04,2.46e+04
relativeHumidity,-0.0762,0.258,-0.295,0.768,-0.584,0.432
temperature,-0.2404,1.643,-0.146,0.884,-3.476,2.995
unixDate,2.2885,0.483,4.741,0.000,1.338,3.239
unixTime,0.0004,0.000,1.686,0.093,-6.25e-05,0.001

0,1,2,3
Omnibus:,3.37,Durbin-Watson:,1.599
Prob(Omnibus):,0.185,Jarque-Bera (JB):,2.745
Skew:,0.137,Prob(JB):,0.253
Kurtosis:,2.578,Cond. No.,206000000.0
