In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, date, time, timedelta
import matplotlib.pyplot as plt
from scipy import stats,optimize
from sklearn import linear_model
import statsmodels.api as sm
from sklearn.metrics import r2_score,mean_squared_error
import math
from noaa_sdk import noaa



In [2]:
raw_spiro = pd.read_csv('raw_spiro.csv',parse_dates=['Date'])
raw_spiro.set_index(['Date','Hour'],inplace=True)
raw_spiro = raw_spiro.drop(['Time'], axis=1)
raw_spiro.head()
#old data, new index

Unnamed: 0_level_0,Unnamed: 1_level_0,PEF(l/min),FEV1(l),unixDate,unixTime
Date,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-21,10,219,1.72,18313.0,36720.0
2020-02-21,11,253,1.78,18313.0,42300.0
2020-02-22,3,388,2.54,18314.0,12660.0
2020-02-22,0,415,2.89,18314.0,480.0
2020-02-23,19,476,2.78,18315.0,70320.0


In [4]:
def get_weather_data(zip_code,country,factors,
                     start=raw_spiro.index.get_level_values("Date").min().strftime('%Y-%m-%d'),
                     end=raw_spiro.index.get_level_values("Date").max().strftime('%Y-%m-%d')):
    n = noaa.NOAA()
    observations = n.get_observations(zip_code,country,start=start,end=end)
    df = pd.DataFrame()
    for observation in observations:
        observation_data = {k:v for (k,v) in observation.items() if k in factors}
        observation_values = {k:v['value'] for (k,v) in observation_data.items()}
        observation_values['time'] = pd.to_datetime(observation['timestamp'])#separate into date and hour
        df = df.append(observation_values, ignore_index=True)
    return df

In [5]:
factors = ['barometricPressure','precipitationLastHour','temperature','relativeHumidity']
weather = get_weather_data('11432','US',factors)
weather['precipitationLastHour'].fillna(0, inplace=True)
weather['Date'] = [d.date() for d in weather['time']]
weather['Hour'] = [d.hour for d in weather['time']]
weather.set_index(['Date','Hour'],inplace=True)
weather.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,barometricPressure,precipitationLastHour,relativeHumidity,temperature,time
Date,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-04-13,23,100410.0,0.0,80.944625,17.2,2020-04-13 23:51:00
2020-04-13,22,100170.0,0.0003,100.0,15.0,2020-04-13 22:51:00
2020-04-13,22,100070.0,0.0003,100.0,13.9,2020-04-13 22:20:00
2020-04-13,21,99970.0,0.0,100.0,13.3,2020-04-13 21:51:00
2020-04-13,20,99830.0,0.0,100.0,13.3,2020-04-13 20:51:00


In [19]:
weather_copy = weather.copy()
weather_copy = weather_copy.reset_index()
weather_copy = weather_copy.groupby('Date').mean()

inhaler_days = pd.read_csv('inhaler_days.csv')
inhaler_days = inhaler_days.rename(columns={'Unnamed: 0':'Date'})
inhaler_days = inhaler_days.set_index('Date')
inhaler_days = weather_copy.join(inhaler_days,how='right').dropna()
inhaler_days = inhaler_days.drop_duplicates()
inhaler_days.head()

Unnamed: 0_level_0,Hour,barometricPressure,precipitationLastHour,relativeHumidity,temperature,first_reading,second_reading,difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03-02,11.5,101741.25,0.0,71.39447,5.920833,218,416,198
2020-03-03,13.310345,100566.206897,0.000203,90.363344,8.727586,248,301,53
2020-03-04,9.451613,100336.451613,0.000235,67.905636,9.86129,218,274,56
2020-03-05,11.2,101718.0,0.0,54.062605,7.58,317,439,122
2020-03-07,11.590909,101884.090909,1.4e-05,54.477525,4.636364,280,441,161


In [20]:
inhaler_days['barometricPressure'] = list(map(lambda x: round(x,3),zscore(inhaler_days['barometricPressure'])))
inhaler_days['precipitationLastHour'] = list(map(lambda x: round(x,3),zscore(inhaler_days['precipitationLastHour'])))
inhaler_days['relativeHumidity'] = list(map(lambda x: round(x,3),zscore(inhaler_days['relativeHumidity'])))
inhaler_days['temperature'] = list(map(lambda x: round(x,3),zscore(inhaler_days['temperature'])))

print("barometricPressure mean :" ,inhaler_days['barometricPressure'].mean())
print("precipitationLastHour mean :" ,inhaler_days['precipitationLastHour'].mean())
print("relativeHumidity mean :" ,inhaler_days['relativeHumidity'].mean())
print("temperature mean :" ,inhaler_days['temperature'].mean())

inhaler_days

NameError: name 'zscore' is not defined

Unnamed: 0_level_0,Unnamed: 1_level_0,barometricPressure,precipitationLastHour,relativeHumidity,temperature,time,first_reading,second_reading,difference
Date,Hour,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-04-07,23,100780.0,0.0,76.720924,10.0,2020-04-07 23:51:00,398,532,134
2020-04-07,22,100780.0,0.0,56.605629,11.1,2020-04-07 22:51:00,398,532,134
2020-04-07,21,100850.0,0.0,43.240314,12.8,2020-04-07 21:51:00,398,532,134
2020-04-07,20,100920.0,0.0,31.704834,15.0,2020-04-07 20:51:00,398,532,134
2020-04-07,19,101020.0,0.0,39.037995,15.6,2020-04-07 19:51:00,398,532,134


In [10]:
weather_spiro = weather.join(raw_spiro,how='inner').dropna()
weather_spiro = weather_spiro.reset_index()
weather_spiro.to_csv('weather_spiro.csv',index=False)
weather_spiro.head()

Unnamed: 0,Date,Hour,barometricPressure,precipitationLastHour,relativeHumidity,temperature,time,PEF(l/min),FEV1(l),unixDate,unixTime
0,2020-02-29,19,101120.0,0.0,37.878208,1.1,2020-02-29 19:51:00,469,2.79,18321.0,71940.0
1,2020-02-29,21,101290.0,0.0,36.0473,0.0,2020-02-29 21:51:00,511,3.42,18321.0,78900.0
2,2020-03-01,22,102000.0,0.0,30.487836,6.1,2020-03-01 22:51:00,482,3.15,18322.0,79860.0
3,2020-03-02,1,102200.0,0.0,49.955326,3.9,2020-03-02 01:51:00,384,2.81,18323.0,7140.0
4,2020-03-02,4,102200.0,0.0,69.291521,1.1,2020-03-02 04:51:00,218,1.94,18323.0,17580.0


In [7]:
Y = weather_spiro['PEF(l/min)']
X = weather_spiro[['barometricPressure','precipitationLastHour','relativeHumidity','temperature',
                   'unixDate','unixTime']]
X = sm.add_constant(X)

mod = sm.OLS(Y,X)
fit = mod.fit()
summary = fit.summary()
summary

0,1,2,3
Dep. Variable:,PEF(l/min),R-squared:,0.185
Model:,OLS,Adj. R-squared:,0.168
Method:,Least Squares,F-statistic:,10.73
Date:,"Tue, 14 Apr 2020",Prob (F-statistic):,9.43e-11
Time:,09:13:54,Log-Likelihood:,-1686.5
No. Observations:,290,AIC:,3387.0
Df Residuals:,283,BIC:,3413.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-5.734e+04,8475.134,-6.766,0.000,-7.4e+04,-4.07e+04
barometricPressure,0.0112,0.005,2.048,0.041,0.000,0.022
precipitationLastHour,7659.7635,8493.950,0.902,0.368,-9059.573,2.44e+04
relativeHumidity,0.0432,0.235,0.184,0.854,-0.419,0.505
temperature,1.9631,1.517,1.294,0.197,-1.023,4.949
unixDate,3.0843,0.451,6.846,0.000,2.198,3.971
unixTime,0.0002,0.000,1.047,0.296,-0.000,0.001

0,1,2,3
Omnibus:,2.972,Durbin-Watson:,1.579
Prob(Omnibus):,0.226,Jarque-Bera (JB):,2.262
Skew:,0.052,Prob(JB):,0.323
Kurtosis:,2.58,Cond. No.,207000000.0
