# Weather Data

In [1]:
#!pip install meteostat
#!pip install pyairnow

Data Structure
Each hour is represented by a Pandas DataFrame row which provides the weather data recorded at that time. These are the different columns:

Column	Description	Type
station	The Meteostat ID of the weather station (only if query refers to multiple stations)	String
time	The datetime of the observation	Datetime64
temp	The air temperature in °C	Float64
dwpt	The dew point in °C	Float64
rhum	The relative humidity in percent (%)	Float64
prcp	The one hour precipitation total in mm	Float64
snow	The snow depth in mm	Float64
wdir	The average wind direction in degrees (°)	Float64
wspd	The average wind speed in km/h	Float64
wpgt	The peak wind gust in km/h	Float64
pres	The average sea-level air pressure in hPa	Float64
tsun	The one hour sunshine total in minutes (m)	Float64
coco	The weather condition code	Float64


In [45]:
# Import Meteostat library and dependencies
from datetime import datetime
import matplotlib.pyplot as plt
from meteostat import Point, Daily, Stations, Hourly

# Get weather stations ordered by distance to Austin, TX 
#(30.4895484046493" N, 97.7249632541566" W) || Latitude of Austin (TX) 30.267153. Longitude of Austin (TX) -97.7430608.
Aus_lat = 30.267153
Aus_long = -97.7430608

# Set time period
start = datetime(2005, 1, 1)
end = datetime(2021, 12, 31, 23, 59)

#Fetch closest station (limit = 1)
stations = Stations()
stations = stations.nearby(Aus_lat, Aus_long)
stations = stations.inventory('daily', (start, end))
location = stations.fetch(1)

# Create Point for Austin, TX
#location = Point(Aus_lat, Aus_long, 70)

# Get daily data for 2005 to 2021
data = Daily(location, start, end)
#data_h = Hourly(location, start, end) 

# Plot line chart including average, minimum and maximum temperature
#data.plot(y=['tavg', 'tmin', 'tmax'])
#plt.show()

The normalize method makes sure that gaps in the time series are filled. necessary before interpolation

Aggregate Functions
Meteostat uses the following aggregate functions:

temp => mean
dwpt => mean
rhum => mean
tavg => mean
tmin => min
tmax => max
prcp => sum
snow => max
wdir => meteostat.utilities.aggregations.degree_mean
wspd => mean
wpgt => max
pres => mean
tsun => sum
coco => max

In [46]:
data = data.normalize()
data = data.interpolate()
data_w = data.aggregate('1W') # Weekly aggregation according to above function
data_w = data_w.fetch()

data = data.aggregate('1D')
data = data.fetch()

data.head()

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2005-01-01,21.1,17.8,23.9,0.0,0.0,,11.5,,1019.3,0.0
2005-01-02,20.6,18.3,22.2,8.1,0.0,,11.2,,1019.7,0.0
2005-01-03,22.8,20.0,25.0,1.8,0.0,,13.3,,1020.2,0.0
2005-01-04,22.8,19.4,25.6,0.5,0.0,,15.1,,1020.6,0.0
2005-01-05,12.8,3.9,21.1,2.5,0.0,,14.0,,1021.1,0.0


In [47]:
print(data.shape)
print(data_w.shape)

(6209, 10)
(888, 10)


In [48]:
data.to_csv("WeatherData_weekly.csv")
data_w.to_csv("WeatherData_weekly.csv")

In [49]:
data_w.describe()

Unnamed: 0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
count,888.0,888.0,888.0,888.0,888.0,251.0,888.0,0.0,888.0,888.0
mean,21.003941,11.477815,31.517568,17.089302,0.230856,158.891235,7.953604,,1016.44482,506.817568
std,7.251894,8.950844,5.850357,28.520209,5.199262,106.643329,2.0933,,4.613467,1190.330275
min,-0.1,-13.8,10.6,0.0,0.0,0.4,2.2,,1005.7,0.0
25%,15.55,3.9,27.2,0.0,0.0,65.95,6.5,,1013.175,0.0
50%,21.55,11.1,31.7,3.45,0.0,153.5,7.9,,1015.5,0.0
75%,27.4,20.6,36.1,23.725,0.0,209.3,9.3,,1019.4,0.0
max,33.3,25.6,44.4,201.6,150.0,359.1,16.2,,1033.7,5069.0


In [50]:
data.describe()

Unnamed: 0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
count,6209.0,6209.0,6209.0,6209.0,6209.0,1542.0,6209.0,0.0,6179.0,6209.0
mean,21.005637,15.45207,27.520551,2.444081,0.102271,163.520817,7.952311,,1016.457,72.484136
std,7.80713,8.063366,8.357006,9.461687,3.257131,109.044366,3.534772,,5.891307,189.689991
min,-9.2,-13.8,-3.8,0.0,0.0,0.0,0.4,,998.3,0.0
25%,15.4,8.9,22.2,0.0,0.0,73.25,5.4,,1012.5,0.0
50%,22.2,17.2,28.3,0.0,0.0,161.9,7.6,,1015.3,0.0
75%,27.6,22.8,34.4,0.0,0.0,211.0,10.4,,1019.7,0.0
max,35.5,28.9,44.4,178.8,150.0,359.0,25.9,,1045.3,841.0


# Pollution data

In [144]:
'''import requests 
from six.moves import urllib

APIkey = 'C0944E2A-D5D0-4231-ABAB-5F7530450F2A'

#&zipCode=78708&date=2022-03-18T00-0000&distance=50&API_KEY=C0944E2A-D5D0-4231-ABAB-5F7530450F2A
url = 'https://www.airnowapi.org/aq/observation/latLong/historical/?format=text/csv&latitude={}&longitude={}&date={}T00-0000&distance=50&API_KEY={}'.format(Aus_lat,Aus_long,start,APIkey)

Pdata = requests.get(url)
airdata = Pdata.json()
print(airdata)
'''


"import requests \nfrom six.moves import urllib\n\nAPIkey = 'C0944E2A-D5D0-4231-ABAB-5F7530450F2A'\n\n#&zipCode=78708&date=2022-03-18T00-0000&distance=50&API_KEY=C0944E2A-D5D0-4231-ABAB-5F7530450F2A\nurl = 'https://www.airnowapi.org/aq/observation/latLong/historical/?format=text/csv&latitude={}&longitude={}&date={}T00-0000&distance=50&API_KEY={}'.format(Aus_lat,Aus_long,start,APIkey)\n\nPdata = requests.get(url)\nairdata = Pdata.json()\nprint(airdata)\n"

In [57]:
import pandas as pd 

PMdata = pd.read_csv('./austin-texas-air-quality.csv')
PMdata.head()

Unnamed: 0,date,pm25,o3,no2,so2
0,2022/3/1,45,,,
1,2022/3/2,49,,,
2,2022/3/3,39,,,
3,2022/3/4,33,,,
4,2022/3/5,33,,,


In [59]:
PMdata.tail()

Unnamed: 0,time,pm25,o3,no2,so2
2695,2014-03-29,21.0,,,
2696,2014-03-30,29.0,,,
2697,2014-03-31,35.0,,,
2698,2016-10-29,,26.0,,
2699,2016-10-30,,14.0,,


In [58]:
PMdata['date'] = pd.to_datetime(PMdata["date"].astype(str), format='%Y-%m-%d')
PMdata.rename(columns = {'date':'time'}, inplace = True)
PMdata.set_index('time')
print(PMdata.shape)
PMdata.head()

(2700, 5)


Unnamed: 0,time,pm25,o3,no2,so2
0,2022-03-01,45,,,
1,2022-03-02,49,,,
2,2022-03-03,39,,,
3,2022-03-04,33,,,
4,2022-03-05,33,,,


In [145]:
Weather_merged_daily = pd.merge(data, PMdata, on=['time'])
Weather_merged_daily.to_csv('Weather_merged_daily.csv')
print(Weather_merged_daily.shape)
Weather_merged_daily.head()

(2624, 15)


Unnamed: 0,time,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,pm25,o3,no2,so2
0,2014-01-01,10.9,3.9,20.6,0.0,0.0,,6.1,,1020.2,0.0,71,,,
1,2014-01-02,6.8,0.6,12.8,0.0,0.0,,12.2,,1024.2,0.0,17,,,
2,2014-01-03,5.6,-2.1,14.4,0.0,0.0,,6.8,,1028.2,0.0,26,,,
3,2014-01-04,13.8,5.6,21.7,0.0,0.0,,10.4,,1014.6,0.0,26,,,
4,2014-01-05,8.2,1.1,15.0,0.0,0.0,,15.5,,1026.6,0.0,10,,,


In [60]:
PMdata.describe()



Unnamed: 0,time,pm25,o3,no2,so2
count,2700,2700.0,2700.0,2700.0,2700.0
unique,2700,88.0,11.0,6.0,1.0
top,2022-03-01 00:00:00,33.0,,,
freq,1,81.0,2687.0,2694.0,2700.0
first,2014-01-01 00:00:00,,,,
last,2022-03-19 00:00:00,,,,


In [139]:
import datetime as dt

PMdata_w = PMdata.copy()

PMdata_w['WeekDate'] = PMdata_w.apply(lambda row: row['time'] - dt.timedelta(days=row['time'].weekday()+1),axis=1)
PMdata_w[' pm25']= pd.to_numeric(PMdata_w[' pm25'],errors = 'coerce',downcast="integer")
PMdata_w[' o3']= pd.to_numeric(PMdata_w[' o3'],errors = 'coerce',downcast="integer")
PMdata_w[' no2']= pd.to_numeric(PMdata_w[' no2'],errors = 'coerce',downcast="integer")
PMdata_w[' so2']= pd.to_numeric(PMdata_w[' so2'],errors = 'coerce',downcast="integer")
#print(PMdata_w.dtypes)
perweek = PMdata_w[' pm25'].groupby(PMdata_w['WeekDate']).mean()
perweek

WeekDate
2013-12-29    30.000000
2014-01-05    26.000000
2014-01-12    29.571429
2014-01-19    28.714286
2014-01-26    23.142857
                ...    
2022-02-13    30.857143
2022-02-20    34.857143
2022-02-27    40.428571
2022-03-06    32.857143
2022-03-13    34.500000
Name:  pm25, Length: 398, dtype: float64

In [141]:
#perweek.rename(columns = {'WeekDate':'time'}, inplace = True)
perweek_df = pd.DataFrame(perweek)
#perweek_df = perweek_df.rename(columns = {'WeekDate':'time'})
perweek_df.index.names = ['time']
perweek_df

Unnamed: 0_level_0,pm25
time,Unnamed: 1_level_1
2013-12-29,30.000000
2014-01-05,26.000000
2014-01-12,29.571429
2014-01-19,28.714286
2014-01-26,23.142857
...,...
2022-02-13,30.857143
2022-02-20,34.857143
2022-02-27,40.428571
2022-03-06,32.857143


In [142]:
Weather_merged_weekly = pd.merge(data_w, perweek_df, on=['time'])
print(Weather_merged_weekly.shape)
Weather_merged_weekly.head()

(388, 11)


Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,pm25
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2013-12-29,8.9,-0.5,17.8,0.0,0.0,,4.7,,1026.9,0.0,30.0
2014-01-05,8.0,-2.1,21.7,0.0,0.0,,8.7,,1023.5,0.0,26.0
2014-01-12,10.5,-5.5,23.9,6.9,0.0,,6.6,,1027.3,0.0,29.571429
2014-01-19,13.0,2.8,24.4,0.0,0.0,,7.0,,1022.2,0.0,28.714286
2014-01-26,9.9,-3.2,27.8,2.8,0.0,,7.7,,1023.2,0.0,23.142857


In [143]:
Weather_merged_weekly.to_csv('Weather_merged_weekly.csv')