<a href="https://colab.research.google.com/github/ByronHsu/ds-100-final/blob/master/ds_final_data_gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import re
import os
import requests, zipfile, io
import warnings
warnings.filterwarnings('ignore')


from google.colab import drive
drive.mount('/content/drive')

project_path = '/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/'

Mounted at /content/drive


In [None]:
wf = pd.read_csv(project_path + 'data_extended/California_Wildfire.csv')
display(wf.head())

Unnamed: 0,OBJECTID,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,SHAPE_Length,SHAPE_Area
0,21440,2020.0,CA,CDF,NEU,NELSON,13212,2020/06/18 00:00:00+00,2020/06/23 00:00:00+00,11.0,,110.0,109.6025,1.0,1,,4179.743142,-733134.7
1,21441,2020.0,CA,CDF,NEU,AMORUSO,11799,2020/06/01 00:00:00+00,2020/06/04 00:00:00+00,2.0,,670.0,685.58502,1.0,1,,12399.375391,-4578172.0
2,21442,2020.0,CA,CDF,NEU,ATHENS,18493,2020/08/10 00:00:00+00,2020/03/01 00:00:00+00,14.0,,26.0,27.30048,1.0,1,,2119.19412,-182387.6
3,21443,2020.0,CA,CDF,NEU,FLEMING,7619,2020/03/31 00:00:00+00,2020/04/01 00:00:00+00,9.0,,13.0,12.93155,1.0,1,,2029.524881,-86679.42
4,21444,2020.0,CA,CDF,NEU,MELANESE,8471,2020/04/14 00:00:00+00,2020/04/19 00:00:00+00,18.0,,10.3,10.31596,1.0,1,,1342.742903,-70179.12


In [None]:
"""
Added cell
There are two purposes for this cell
1. merge 2020 data with the data of past N (4) years
2. add external dataset: Barometric Pressure, RH and Dewpoint, PM2.5 (also 5 years)
"""

def lambda_county_state(x):
    return f"{x['State Code']}_{int(x['County Code']):03d}"

# features: "ozone", "so2", "co", "no2", "temp", "wind"
def add_features(df, df2, features):
    df = df.copy()
    for feature in features: 
        print(feature)
        epa_data_feature = df2.get(f'daily_{feature}') \
                .rename(columns={'Date Local': 'Date', 'Arithmetic Mean': f'{feature}'})
        epa_data_feature = epa_data_feature[['State Code', 'County Code', 'Date', f'{feature}']]
        epa_data_feature['state_county'] = epa_data_feature.apply(lambda_county_state, axis=1)
        epa_data_feature = epa_data_feature.drop(['State Code', 'County Code'], 1)
        epa_data_feature = epa_data_feature.groupby(['state_county', 'Date']).mean()
        df = df.merge(right = epa_data_feature, on = ['state_county', 'Date'], how="left")
    return df

epa_weburl = "https://aqs.epa.gov/aqsweb/airdata/"
raw_df = pd.read_csv(project_path + "data_extended/epa_filenames_ext.csv")

# display(raw_df.head())
from collections import defaultdict

# 2-fold dict: year - name - (epa_filename, feature_name)
year_dict = defaultdict(lambda :defaultdict(tuple))
for name, filename, feature, year in zip(raw_df["name"], raw_df["epa_filename"], raw_df["feature_name"], raw_df["year"]):
    year_dict[year][name] = (filename, feature)

# print(year_dict[2020], year_dict[2019])
aqi_list = []

for year in range(2016, 2020+1):
    data_in_year = {}
    all_feature = []
    
    for name, (filename, feature) in year_dict[year].items():
        path_name = project_path + 'data_extended/{}'.format(name)
        print(f"{path_name}/{filename}")
        if not os.path.isfile(f"{path_name}/{filename}"): 
            data_url = '{}{}.zip'.format(epa_weburl, filename)
            req = requests.get(data_url)
            z = zipfile.ZipFile(io.BytesIO(req.content))
            z.extractall(path_name)
        data = pd.read_csv(f'{project_path}/data_extended/{name}/{filename}.csv')
        data_in_year[name] = data
        if feature != "aqi": all_feature.append(feature)
        
    aqi = data_in_year['daily_county_aqi']
    aqi = aqi[['County Code', 'State Code', 'Date', 'Category', 'AQI']]
    aqi['Category'] = aqi['Category'].str.lower()
    aqi['state_county'] = aqi.apply(lambda_county_state, 1)
    aqi = aqi.drop(['County Code', 'State Code'], 1)
    
    aqi = add_features(aqi, data_in_year, all_feature)
    display(aqi)
    aqi_list.append(aqi)
    
final = pd.concat(aqi_list, ignore_index = True)
final.head()

/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_county_aqi/daily_aqi_by_county_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_ozone/daily_44201_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_so2/daily_42401_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_co/daily_42101_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_no2/daily_42602_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_temp/daily_WIND_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_wind/daily_TEMP_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_pm2.5/daily_88101_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_barometric_pressure/daily_PRESS_2016
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_ex

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint
0,2016-01-01,good,33,1_003,,,,,,,7.9,,
1,2016-01-04,good,22,1_003,,,,,,,5.3,,
2,2016-01-07,good,28,1_003,,,,,,,6.7,,
3,2016-01-10,good,13,1_003,,,,,,,3.0,,
4,2016-01-13,good,36,1_003,,,,,,,8.7,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
330469,2016-12-27,good,43,56_045,0.045647,0.062500,,,,21.562500,,858.208333,62.979166
330470,2016-12-28,good,42,56_045,0.041000,0.168750,,,,23.541666,,860.087500,57.833333
330471,2016-12-29,good,39,56_045,0.041235,-0.060417,,,,22.750000,,867.689583,43.645833
330472,2016-12-30,good,40,56_045,0.040941,0.293750,,,,31.208333,,853.760417,56.854166


/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_county_aqi/daily_aqi_by_county_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_ozone/daily_44201_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_so2/daily_42401_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_co/daily_42101_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_no2/daily_42602_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_temp/daily_WIND_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_wind/daily_TEMP_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_pm2.5/daily_88101_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_barometric_pressure/daily_PRESS_2017
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_ex

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint
0,2017-01-01,good,21,1_003,,,,,,,5.0,,
1,2017-01-04,good,22,1_003,,,,,,,5.3,,
2,2017-01-10,good,19,1_003,,,,,,,4.5,,
3,2017-01-13,good,30,1_003,,,,,,,7.2,,
4,2017-01-16,good,16,1_003,,,,,,,3.9,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
341068,2017-12-27,good,39,56_045,0.039412,0.381250,,,,13.958334,,861.381250,69.479167
341069,2017-12-28,good,36,56_045,0.032765,0.100000,,,,21.500000,,862.833333,83.354166
341070,2017-12-29,good,31,56_045,0.028059,0.114584,,,,10.583334,,861.487500,83.270833
341071,2017-12-30,good,31,56_045,0.031882,0.089584,,,,-0.875000,,863.004166,84.666666


/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_county_aqi/daily_aqi_by_county_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_ozone/daily_44201_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_so2/daily_42401_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_co/daily_42101_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_no2/daily_42602_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_temp/daily_WIND_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_wind/daily_TEMP_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_pm2.5/daily_88101_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_barometric_pressure/daily_PRESS_2018
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_ex

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint
0,2018-01-02,good,32,1_003,,,,,,,7.6,,
1,2018-01-05,good,34,1_003,,,,,,,8.1,,
2,2018-01-08,good,15,1_003,,,,,,,3.6,,
3,2018-01-11,good,19,1_003,,,,,,,4.5,,
4,2018-01-14,good,25,1_003,,,,,,,6.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
343410,2018-12-27,good,36,56_045,0.036353,1.075000,,,,15.062500,,856.781250,74.104166
343411,2018-12-28,good,35,56_045,0.034706,0.970834,,,,7.437500,,863.045833,74.812500
343412,2018-12-29,good,35,56_045,0.036471,1.195833,,,,16.500000,,856.862500,65.708333
343413,2018-12-30,good,31,56_045,0.030000,1.458333,,,,27.579710,,848.481703,84.750906


/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_county_aqi/daily_aqi_by_county_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_ozone/daily_44201_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_so2/daily_42401_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_co/daily_42101_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_no2/daily_42602_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_temp/daily_WIND_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_wind/daily_TEMP_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_pm2.5/daily_88101_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_barometric_pressure/daily_PRESS_2019
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_ex

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint
0,2019-01-03,good,18,1_003,,,,,,,4.3,,
1,2019-01-06,good,35,1_003,,,,,,,8.5,,
2,2019-01-09,good,14,1_003,,,,,,,3.3,,
3,2019-01-12,good,36,1_003,,,,,,,8.6,,
4,2019-01-15,good,38,1_003,,,,,,,9.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
341935,2019-12-27,good,36,56_045,0.032235,0.152083,,,,26.854167,,857.300000,73.791666
341936,2019-12-28,good,37,56_045,0.036882,0.212500,,,,25.229167,,854.533334,68.354167
341937,2019-12-29,good,34,56_045,0.036412,0.195833,,,,22.083334,,855.735417,72.916666
341938,2019-12-30,good,36,56_045,0.038647,0.031250,,,,20.875000,,860.814584,62.083333


/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_county_aqi/daily_aqi_by_county_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_ozone/daily_44201_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_so2/daily_42401_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_co/daily_42101_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_no2/daily_42602_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_temp/daily_WIND_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_wind/daily_TEMP_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_pm2.5/daily_88101_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_extended/daily_barometric_pressure/daily_PRESS_2020
/content/drive/MyDrive/Data_C200_Final_Project_Part_2/data/data_ex

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint
0,2020-01-01,good,48,1_003,,,,,,,11.6,,
1,2020-01-04,good,13,1_003,,,,,,,3.2,,
2,2020-01-07,good,14,1_003,,,,,,,3.3,,
3,2020-01-10,good,39,1_003,,,,,,,9.3,,
4,2020-01-13,good,29,1_003,,,,,,,6.9,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
338190,2020-12-27,good,32,56_045,0.033176,1.743750,,,,29.791667,,858.191667,81.625000
338191,2020-12-28,good,30,56_045,0.030471,1.543750,,,,25.666666,,863.412500,81.250000
338192,2020-12-29,good,33,56_045,0.033765,1.787500,,,,22.583333,,857.008333,85.020833
338193,2020-12-30,good,33,56_045,0.034588,1.887500,,,,16.937500,,859.272917,74.416666


Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint
0,2016-01-01,good,33,1_003,,,,,,,7.9,,
1,2016-01-04,good,22,1_003,,,,,,,5.3,,
2,2016-01-07,good,28,1_003,,,,,,,6.7,,
3,2016-01-10,good,13,1_003,,,,,,,3.0,,
4,2016-01-13,good,36,1_003,,,,,,,8.7,,


In [None]:
def add_gas(df):
  df = df.copy()
  gas_df = pd.read_csv(project_path + "data_extended/GasByCounty.csv")
  gas_df["County"] = gas_df["County"].str.lower()
  gas_df = gas_df.melt(id_vars=["County"], value_vars=["2020", "2019", "2018", "2017", "2016"], var_name="Year")
  county_code_df = pd.read_csv(project_path + "data_extended/CA_county_code_updated.csv")
  county_map = {}  # map county_name to county_number
  for name, num in zip(county_code_df["county_name"], county_code_df["county_number"]):
    county_map[name.lower()] = f"6_{num:03d}"
  gas_df["state_county"] = gas_df["County"].replace(county_map)
  gas_df = gas_df.rename(columns={"value": "gas"}).drop("County", axis=1)
  df["Year"] = df["Date"].str.extract("^(\d+)")
  df = df.merge(right=gas_df, on=["state_county", "Year"], how="left").drop("Year", axis=1)
  return df

final_gas = add_gas(final)
display(final_gas.head())

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint,gas
0,2016-01-01,good,33,1_003,,,,,,,7.9,,,
1,2016-01-04,good,22,1_003,,,,,,,5.3,,,
2,2016-01-07,good,28,1_003,,,,,,,6.7,,,
3,2016-01-10,good,13,1_003,,,,,,,3.0,,,
4,2016-01-13,good,36,1_003,,,,,,,8.7,,,


In [None]:
def add_climate_type(df):
  df = df.copy()
  county_code_df = pd.read_csv(project_path + "data_extended/CA_county_code_updated.csv")
  county_map = {}  # map county_name to county_number
  for name, num in zip(county_code_df["county_name"], county_code_df["county_number"]):
    county_map[name.lower()] = f"6_{num:03d}"
    
  display(county_map)
  state_to_ctype = {'Alameda': 'mediterranean', 'Alpine': 'continental', 'Amador': 'mediterranean', 'Butte': 'mediterranean', 'Calaveras': 'mediterranean',
    'Colusa': 'mediterranean', 'Contra Costa':'mediterranean', 'Del Norte': 'mediterranean', 'El Dorado': 'mediterranean', 'Fresno': 'semi-arid',
    'Glenn': 'mediterranean', 'Humboldt':'mediterranean', 'Imperial': 'arid', 'Inyo': 'semi-arid', 'Kern': 'semi-arid', 'Kings': 'semi-arid', 'Lake': 'mediterranean',
    'Lassen': 'continental', 'Los Angeles': 'mediterranean', 'Madera': 'semi-arid', 'Marin': 'mediterranean', 'Mariposa': 'mediterranean', 'Mendocino': 'mediterranean',
    'Merced': 'semi-arid', 'Modoc': 'continental', 'Mono': 'continental', 'Monterey': 'mediterranean', 'Napa': 'mediterranean', 'Nevada': 'mediterranean', 'Orange': 'semi-arid',
    'Placer': 'mediterranean', 'Plumas': 'continental', 'Riverside': 'arid', 'Sacramento': 'mediterranean', 'San Benito': 'semi-arid', 'San Bernardino': 'arid', 'San Diego': 'mediterranean',
    'San Francisco': 'mediterranean', 'San Joaquin': 'mediterranean', 'San Luis Obispo': 'mediterranean', 'San Mateo': 'mediterranean', 'Santa Barbara': 'mediterranean', 'Santa Clara': 'mediterranean',
    'Santa Cruz': 'mediterranean', 'Shasta': 'mediterranean', 'Sierra': 'continental', 'Siskiyou': 'continental', 'Solano': 'mediterranean', 'Sonoma': 'mediterranean', 'Stanislaus': 'semi-arid', 
    'Sutter': 'mediterranean', 'Tehama': 'mediterranean', 'Trinity': 'mediterranean', 'Tulare': 'semi-arid', 'Tuolumne': 'mediterranean', 'Ventura': 'mediterranean', 'Yolo': 'mediterranean', 'Yuba': 'mediterranean'}

  state_code_to_ctype = {'state_county': [], 'climate': []}
  for k, v in state_to_ctype.items():
    state_code_to_ctype['state_county'].append(county_map[k.lower()])
    state_code_to_ctype['climate'].append(v)

  state_code_to_ctype_df = pd.DataFrame(state_code_to_ctype)

  df = df.merge(right=state_code_to_ctype_df, on=['state_county'], how='left')
  return df

final_gas_climate = add_climate_type(final_gas)

{'alameda': '6_001',
 'alpine': '6_003',
 'amador': '6_005',
 'butte': '6_007',
 'calaveras': '6_009',
 'colusa': '6_011',
 'contra costa': '6_013',
 'del norte': '6_015',
 'el dorado': '6_017',
 'fresno': '6_019',
 'glenn': '6_021',
 'humboldt': '6_023',
 'imperial': '6_025',
 'inyo': '6_027',
 'kern': '6_029',
 'kings': '6_031',
 'lake': '6_033',
 'lassen': '6_035',
 'los angeles': '6_037',
 'madera': '6_039',
 'marin': '6_041',
 'mariposa': '6_043',
 'mendocino': '6_045',
 'merced': '6_047',
 'modoc': '6_049',
 'mono': '6_051',
 'monterey': '6_053',
 'napa': '6_055',
 'nevada': '6_057',
 'orange': '6_059',
 'placer': '6_061',
 'plumas': '6_063',
 'riverside': '6_065',
 'sacramento': '6_067',
 'san benito': '6_069',
 'san bernardino': '6_071',
 'san diego': '6_073',
 'san francisco': '6_075',
 'san joaquin': '6_077',
 'san luis obispo': '6_079',
 'san mateo': '6_081',
 'santa barbara': '6_083',
 'santa clara': '6_085',
 'santa cruz': '6_087',
 'shasta': '6_089',
 'sierra': '6_091',
 

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import Normalizer

# To calculate mean use imputer class
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
# normalize
normalizer = Normalizer()
    
def fillna_normalize(df, cols):
    df.loc[:, cols] = imputer.fit_transform(df.loc[:, cols])
    df.loc[:, cols] = normalizer.fit_transform(df.loc[:, cols])
    # display(df.groupby('state_county').filter(lambda g: (g['Date'].nunique() == 1827))['state_county'].unique())
    return df

final_gas_climate = fillna_normalize(final_gas_climate, ['ozone', 'so2', 'co', 'no2', 'temp', 'wind', 'pm2.5', 'barometric_pressure', 'rh_and_dewpoint', 'gas'])
display(final_gas_climate.head())

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint,gas,climate
0,2016-01-01,good,33,1_003,3e-05,0.000728,0.00026,0.006663,0.093142,0.053429,0.007576,0.955603,0.059777,0.267614,
1,2016-01-04,good,22,1_003,3e-05,0.000728,0.00026,0.006663,0.093144,0.05343,0.005083,0.955618,0.059778,0.267618,
2,2016-01-07,good,28,1_003,3e-05,0.000728,0.00026,0.006663,0.093143,0.05343,0.006425,0.955611,0.059777,0.267616,
3,2016-01-10,good,13,1_003,3e-05,0.000728,0.00026,0.006663,0.093144,0.053431,0.002877,0.955626,0.059778,0.26762,
4,2016-01-13,good,36,1_003,3e-05,0.000728,0.00026,0.006663,0.093141,0.053429,0.008343,0.955597,0.059776,0.267612,


In [None]:
display(final_gas_climate.head())
final_gas_climate.to_csv(project_path+'data_extended/5_years.csv', index=False)

Unnamed: 0,Date,Category,AQI,state_county,ozone,so2,co,no2,temp,wind,pm2.5,barometric_pressure,rh_and_dewpoint,gas,climate
0,2016-01-01,good,33,1_003,3e-05,0.000728,0.00026,0.006663,0.093142,0.053429,0.007576,0.955603,0.059777,0.267614,
1,2016-01-04,good,22,1_003,3e-05,0.000728,0.00026,0.006663,0.093144,0.05343,0.005083,0.955618,0.059778,0.267618,
2,2016-01-07,good,28,1_003,3e-05,0.000728,0.00026,0.006663,0.093143,0.05343,0.006425,0.955611,0.059777,0.267616,
3,2016-01-10,good,13,1_003,3e-05,0.000728,0.00026,0.006663,0.093144,0.053431,0.002877,0.955626,0.059778,0.26762,
4,2016-01-13,good,36,1_003,3e-05,0.000728,0.00026,0.006663,0.093141,0.053429,0.008343,0.955597,0.059776,0.267612,
