In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, date, time, timedelta
import matplotlib.pyplot as plt
from scipy import stats,optimize
from sklearn import linear_model
import statsmodels.api as sm
from sklearn.metrics import r2_score,mean_squared_error
import math
from noaa_sdk import noaa
from scipy.stats import zscore



In [2]:
raw_spiro = pd.read_csv('raw_spiro.csv',parse_dates=['Date'])
raw_spiro.set_index(['Date','Hour'],inplace=True)
raw_spiro = raw_spiro.drop(['Time'], axis=1)
raw_spiro.head()
#old data, new index

Unnamed: 0_level_0,Unnamed: 1_level_0,PEF(l/min),FEV1(l),unixDate,unixTime
Date,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-27,15,350,2.57,18319.0,55020.0
2020-02-28,16,357,2.35,18320.0,60360.0
2020-02-29,4,568,3.61,18321.0,16680.0
2020-02-29,19,469,2.79,18321.0,71940.0
2020-02-29,21,511,3.42,18321.0,78900.0


In [3]:
def get_weather_data(zip_code,country,factors,
                     start=raw_spiro.index.get_level_values("Date").min().strftime('%Y-%m-%d'),
                     end=raw_spiro.index.get_level_values("Date").max().strftime('%Y-%m-%d')):
    n = noaa.NOAA()
    observations = n.get_observations(zip_code,country,start=start,end=end)
    df = pd.DataFrame()
    for observation in observations:
        observation_data = {k:v for (k,v) in observation.items() if k in factors}
        observation_values = {k:v['value'] for (k,v) in observation_data.items()}
        observation_values['time'] = pd.to_datetime(observation['timestamp'])#separate into date and hour
        df = df.append(observation_values, ignore_index=True)
    return df

In [4]:
factors = ['barometricPressure','precipitationLastHour','temperature','relativeHumidity']
weather = get_weather_data('11432','US',factors)
weather['precipitationLastHour'].fillna(0, inplace=True)
weather['Date'] = [d.date() for d in weather['time']]
weather['Hour'] = [d.hour for d in weather['time']]
weather.set_index(['Date','Hour'],inplace=True)
weather.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,barometricPressure,precipitationLastHour,relativeHumidity,temperature,time
Date,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-05-02,17,101460.0,0.0,34.901768,21.1,2020-05-02 17:51:00
2020-05-02,16,101520.0,0.0,34.515809,20.6,2020-05-02 16:51:00
2020-05-02,15,101560.0,0.0,38.940284,20.0,2020-05-02 15:51:00
2020-05-02,14,101590.0,0.0,41.092708,17.8,2020-05-02 14:51:00
2020-05-02,13,101590.0,0.0,44.190555,16.1,2020-05-02 13:51:00


In [5]:
weather_copy = weather.copy()
weather_copy = weather_copy.reset_index()
weather_copy = weather_copy.groupby('Date').mean()

inhaler_days = pd.read_csv('inhaler_days.csv')
inhaler_days = inhaler_days.rename(columns={'Unnamed: 0':'Date'})
inhaler_days = inhaler_days.set_index('Date')
inhaler_days = weather_copy.join(inhaler_days,how='right').dropna()
inhaler_days = inhaler_days.drop_duplicates()
inhaler_days.head()

Unnamed: 0_level_0,Hour,barometricPressure,precipitationLastHour,relativeHumidity,temperature,first_reading,second_reading,difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03-19,15.1,102425.0,0.001395,91.825304,8.51,384,370,-14
2020-03-20,11.894737,101644.473684,0.0,92.148375,9.868421,317,359,42
2020-03-21,11.5,102172.083333,0.0,41.895242,11.8625,226,407,181
2020-03-27,11.5,101055.0,0.0,69.252372,12.083333,357,397,40
2020-04-01,10.740741,101050.740741,0.0,63.813009,6.859259,319,486,167


In [6]:
#see weath
inhaler_days['barometricPressure'] = list(map(lambda x: round(x,3),zscore(inhaler_days['barometricPressure'])))
inhaler_days['precipitationLastHour'] = list(map(lambda x: round(x,3),zscore(inhaler_days['precipitationLastHour'])))
inhaler_days['relativeHumidity'] = list(map(lambda x: round(x,3),zscore(inhaler_days['relativeHumidity'])))
inhaler_days['temperature'] = list(map(lambda x: round(x,3),zscore(inhaler_days['temperature'])))

print("barometricPressure mean :" ,inhaler_days['barometricPressure'].mean())
print("precipitationLastHour mean :" ,inhaler_days['precipitationLastHour'].mean())
print("relativeHumidity mean :" ,inhaler_days['relativeHumidity'].mean())
print("temperature mean :" ,inhaler_days['temperature'].mean())

inhaler_days

barometricPressure mean : -6.661338147750939e-17
precipitationLastHour mean : -0.0003000000000000502
relativeHumidity mean : 0.0001000000000000334
temperature mean : -4.4408920985006264e-17


Unnamed: 0_level_0,Hour,barometricPressure,precipitationLastHour,relativeHumidity,temperature,first_reading,second_reading,difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03-19,15.1,1.562,2.84,1.049,-1.102,384,370,-14
2020-03-20,11.894737,0.552,-0.511,1.066,-0.37,317,359,42
2020-03-21,11.5,1.234,-0.511,-1.463,0.705,226,407,181
2020-03-27,11.5,-0.211,-0.511,-0.086,0.824,357,397,40
2020-04-01,10.740741,-0.217,-0.511,-0.36,-1.992,319,486,167
2020-04-02,11.5,-0.833,-0.511,-1.147,-0.672,381,371,-10
2020-04-06,11.5,0.493,-0.351,0.006,0.505,532,553,21
2020-04-07,11.5,0.158,-0.511,-1.272,1.026,398,532,134
2020-04-09,12.0,-2.051,0.082,0.862,-0.227,390,499,109
2020-05-01,10.676471,-0.687,0.492,1.346,1.303,286,418,132


In [7]:
weather_spiro = weather.join(raw_spiro,how='inner').dropna()
weather_spiro = weather_spiro.reset_index()
weather_spiro.to_csv('weather_spiro.csv',index=False)
weather_spiro.head()

Unnamed: 0,Date,Hour,barometricPressure,precipitationLastHour,relativeHumidity,temperature,time,PEF(l/min),FEV1(l),unixDate,unixTime
0,2020-03-19,16,102410.0,0.0,82.744137,10.0,2020-03-19 16:51:00,505,3.14,18340.0,57960.0
1,2020-03-19,16,102370.0,0.0,89.741574,9.4,2020-03-19 16:20:00,505,3.14,18340.0,57960.0
2,2020-03-19,18,102340.0,0.0,73.905076,11.7,2020-03-19 18:51:00,444,3.04,18340.0,65940.0
3,2020-03-19,20,102340.0,0.0,79.496095,10.6,2020-03-19 20:51:00,496,3.25,18340.0,74220.0
4,2020-03-19,23,102440.0,0.0,92.815182,8.9,2020-03-19 23:51:00,299,1.97,18340.0,83940.0


In [8]:
Y = weather_spiro['PEF(l/min)']
X = weather_spiro[['barometricPressure','precipitationLastHour','relativeHumidity','temperature',
                   'unixDate','unixTime']]
X = sm.add_constant(X)

mod = sm.OLS(Y,X)
fit = mod.fit()
summary = fit.summary()
summary

0,1,2,3
Dep. Variable:,PEF(l/min),R-squared:,0.087
Model:,OLS,Adj. R-squared:,0.06
Method:,Least Squares,F-statistic:,3.224
Date:,"Sat, 02 May 2020",Prob (F-statistic):,0.00479
Time:,14:27:39,Log-Likelihood:,-1213.6
No. Observations:,209,AIC:,2441.0
Df Residuals:,202,BIC:,2465.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.779e+04,9999.169,-2.779,0.006,-4.75e+04,-8074.541
barometricPressure,0.0093,0.007,1.388,0.167,-0.004,0.023
precipitationLastHour,5490.1911,6003.903,0.914,0.362,-6348.169,1.73e+04
relativeHumidity,-0.0184,0.271,-0.068,0.946,-0.552,0.515
temperature,1.8697,1.827,1.024,0.307,-1.732,5.471
unixDate,1.4835,0.531,2.794,0.006,0.436,2.530
unixTime,0.0006,0.000,2.376,0.018,0.000,0.001

0,1,2,3
Omnibus:,1.26,Durbin-Watson:,1.622
Prob(Omnibus):,0.533,Jarque-Bera (JB):,1.34
Skew:,-0.14,Prob(JB):,0.512
Kurtosis:,2.725,Cond. No.,205000000.0
