In [1]:
# Library to suppress warnings or deprecation notes 
import warnings
warnings.filterwarnings('ignore')

import os
import math

# Libraries to help with reading and manipulating data
import numpy as np
import pandas as pd
import glob
from tqdm import tqdm

## Assumes that rukshar_weather_data_preprocessing.ipynb has run and created the Combined_<weather element>*.csv files

In [2]:
def create_combined_merged_weather_file(root_directory, output_directory):
    '''Create an output combined merged weather across all elements
        :root_directory: Root directory containing the combined weather files
        :output_directory: Directory to combined output merged files
        :min_date: The min date to exclude from the date range after building the combined file default is 12/31/2016
    '''

    directory_mask = os.path.join(root_directory, 'Combined*.csv')
    # Find list of matching filenames in the directory
    filenames = glob.glob(directory_mask)

    combined_merged_df = pd.read_csv(filenames[0])
    filenames.pop(0)

    for filename in filenames:
        filename_base = os.path.basename(filename)
        # Expected format is Combined_<weather_element>_ECS_<weather element name>.csv
        keys = filename_base.split('_')
        print('Processing {}'.format(filename_base))
        df = pd.read_csv(filename, usecols = ['STAID','SOUNAME', 'DATE', keys[1], 'Q_' + keys[1]])
        combined_merged_df = combined_merged_df.merge(df, how='inner', on=['STAID','SOUNAME','DATE'])

    combined_merged_df.to_csv(os.path.join(output_directory, 'Combined_Merged_Weather_Data.csv'), index=False)

In [3]:
root_directory = 'C:/Users/thayes/omdena/warsaw-poland-chapter-air-pollution/src/data/data_raw/merged_weather_data_CSV'
output_directory = 'c:/temp/processed_weather_data'

create_combined_merged_weather_file(root_directory, output_directory)

Processing Combined_FG_ECA_wind_speed.csv
Processing Combined_HU_ECA_humidity.csv
Processing Combined_PP_ECA_sea_level_pressure.csv
Processing Combined_QQ_ECA_global_radiation.csv
Processing Combined_RR_ECA_precipitation.csv
Processing Combined_SD_ECA_snow depth.csv
Processing Combined_SS_ECA_sunshine.csv
Processing Combined_TG_ECA_mean_temperature.csv
