In [7]:
'''
This Python file will read each of the clean annual data csv files and create 6 new split data files so that the
different climate tables can be loaded into a relational database. 
'''
import pandas as pd
import os
import glob

dir_path = r'resources'
all_files = glob.glob(os.path.join(dir_path , "*.csv")) 

file_list = []
for filename in all_files:
    all_data_df = pd.read_csv(filename, index_col=None, header=0)
    file_list.append(all_data_df)

#create one large DataFrame of all years of the data    
entire_df = pd.concat(file_list, ignore_index=True)
entire_df.tail()

Unnamed: 0,DATE,WND_direction,WND_dQC,WND_type,WND_speed,WND_speedQC,SKY_ceiling,SKY_ceilingQC,SKY_ceilingDC,SKY_CAVOK,VIS_distance,VIS_distanceQC,VIS_variability,VIS_variabilityQC,AIR_temp,AIR_tempQC,DEW_temp,DEW_tempQC,SLP_pressure,SLP_pressureQC
56142,2023-04-15 04:54:00,80,5,N,77,5,22000,5,C,N,16093,5,N,5,310,7,230,7,230,7
56143,2023-04-15 11:54:00,60,5,N,57,5,22000,5,C,N,16093,5,N,5,280,7,230,7,230,7
56144,2023-04-15 12:54:00,60,5,N,57,5,1524,5,M,N,16093,5,N,5,280,7,230,7,230,7
56145,2023-04-15 15:54:00,70,5,N,62,5,1676,5,M,N,16093,5,N,5,272,5,222,5,222,5
56146,2023-04-15 23:54:00,80,5,N,62,5,1158,5,M,N,16093,5,N,5,280,7,240,7,240,7


In [10]:
# create new DataFrames for each of the 6 climate variables to load into their own database tables
# Start with the wind variables
wnd_df = entire_df.filter(['DATE','WND_direction', 'WND_dQC', 'WND_type', 'WND_speed', 'WND_speedQC'])
wnd_df.tail(20)

Unnamed: 0,DATE,WND_direction,WND_dQC,WND_type,WND_speed,WND_speedQC
56127,2023-04-12 12:26:00,90,5,N,46,5
56128,2023-04-12 14:54:00,80,5,N,46,5
56129,2023-04-12 18:54:00,90,5,N,36,5
56130,2023-04-13 14:54:00,60,5,N,57,5
56131,2023-04-13 15:54:00,90,5,N,31,5
56132,2023-04-13 16:54:00,50,5,N,31,5
56133,2023-04-14 13:54:00,50,5,N,36,5
56134,2023-04-14 14:54:00,70,5,N,62,5
56135,2023-04-14 16:54:00,60,5,N,41,5
56136,2023-04-14 19:54:00,50,5,N,41,5


In [19]:
# Write out wind data to csv format
wnd_df.to_csv('resources/WND/wnd_data.csv', index=False)

In [20]:
# Create the SKy conditions DataFrame
sky_df = entire_df.filter(['DATE','SKY_ceiling', 'SKY_ceilingQC', 'SKY_ceilingDC', 'SKY_CAVOK'])
sky_df.tail(20)

Unnamed: 0,DATE,SKY_ceiling,SKY_ceilingQC,SKY_ceilingDC,SKY_CAVOK
56127,2023-04-12 12:26:00,671,5,M,N
56128,2023-04-12 14:54:00,1433,5,M,N
56129,2023-04-12 18:54:00,1829,5,M,N
56130,2023-04-13 14:54:00,1341,5,M,N
56131,2023-04-13 15:54:00,1829,5,M,N
56132,2023-04-13 16:54:00,2134,5,M,N
56133,2023-04-14 13:54:00,1341,5,M,N
56134,2023-04-14 14:54:00,1341,5,M,N
56135,2023-04-14 16:54:00,22000,5,C,N
56136,2023-04-14 19:54:00,1494,5,M,N


In [21]:
# write out sky into csv file
sky_df.to_csv('resources/SKY/sky_data.csv', index=False)

In [22]:
# Create visibility DataFrame
vis_df = entire_df.filter(['DATE','VIS_distance', 'VIS_distanceQC', 'VIS_variability', 'VIS_variabilityQC'])
vis_df.head(20)

Unnamed: 0,DATE,VIS_distance,VIS_distanceQC,VIS_variability,VIS_variabilityQC
0,2013-01-01 00:10:00,4828,5,N,5
1,2013-01-01 00:54:00,6437,5,N,5
2,2013-01-01 01:32:00,6437,5,N,5
3,2013-01-01 02:43:00,4828,5,N,5
4,2013-01-01 02:54:00,16093,5,N,5
5,2013-01-01 03:25:00,16093,5,N,5
6,2013-01-01 03:40:00,16093,5,N,5
7,2013-01-01 04:49:00,16093,5,N,5
8,2013-01-01 04:54:00,16093,5,N,5
9,2013-01-01 06:51:00,16093,5,N,5


In [23]:
# write out visibility data into csv file
vis_df.to_csv('resources/VIS/vis_data.csv', index=False)

In [24]:
# Create air temp DataFrame
air_temp_df = entire_df.filter(['DATE','AIR_temp', 'AIR_tempQC'])
air_temp_df.head(20)

Unnamed: 0,DATE,AIR_temp,AIR_tempQC
0,2013-01-01 00:10:00,280,5
1,2013-01-01 00:54:00,278,5
2,2013-01-01 01:32:00,280,5
3,2013-01-01 02:43:00,280,5
4,2013-01-01 02:54:00,283,5
5,2013-01-01 03:25:00,290,5
6,2013-01-01 03:40:00,290,5
7,2013-01-01 04:49:00,290,5
8,2013-01-01 04:54:00,289,5
9,2013-01-01 06:51:00,290,5


In [25]:
# write out air temp data into csv file
air_temp_df.to_csv('resources/TMP/temp_data.csv', index=False)

In [29]:
# Create dew point DataFrame
dew_point_df = entire_df.filter(['DATE','DEW_temp', 'DEW_tempQC'])
dew_point_df.tail()

Unnamed: 0,DATE,DEW_temp,DEW_tempQC
56142,2023-04-15 04:54:00,230,7
56143,2023-04-15 11:54:00,230,7
56144,2023-04-15 12:54:00,230,7
56145,2023-04-15 15:54:00,222,5
56146,2023-04-15 23:54:00,240,7


In [30]:
# write out dew point data into csv file
dew_point_df.to_csv('resources/DEW/dew_point.csv', index=False)

In [32]:
# Create sea level pressure DataFrame
sea_level_pressure_df = entire_df.filter(['DATE','SLP_pressure', 'SLP_pressureQC'])
sea_level_pressure_df.head(25)

Unnamed: 0,DATE,SLP_pressure,SLP_pressureQC
0,2013-01-01 00:10:00,260,5
1,2013-01-01 00:54:00,261,5
2,2013-01-01 01:32:00,260,5
3,2013-01-01 02:43:00,260,5
4,2013-01-01 02:54:00,261,5
5,2013-01-01 03:25:00,260,5
6,2013-01-01 03:40:00,260,5
7,2013-01-01 04:49:00,260,5
8,2013-01-01 04:54:00,256,5
9,2013-01-01 06:51:00,250,5


In [33]:
# write out sea level data into csv file
sea_level_pressure_df.to_csv('resources/SLP/sea_level_pressure.csv', index=False)

In [35]:
print('all done!')

all done!
