In [1]:
import csv
import datetime 

In [2]:
def read_raw_csv_data (file_path):
    """Reads raw CSV data from a file
    specified by file_path
    """   
    read_file = open(file_path)
    dict_reader = csv.DictReader(read_file)
    read_data = list(dict_reader)
    read_file.close()
    
    return read_data

In [3]:
# read the raw diesel price data
diesel_prices_raw_data = read_raw_csv_data("./data/dieselPricesInRomania.csv")

# read the raw gasoline price data
gasoline_prices_raw_data = read_raw_csv_data("./data/gasolinePricesInRomania.csv")

In [4]:
def extract_datetime_value(raw_value):
    """Extracts the datetime value from a 
    string data value
    """
    try:
        return datetime.datetime.strptime(raw_value, "%d-%m-%Y")
    except ValueError:
        return datetime.datetime.strptime(raw_value, "%d/%m/%Y")

In [5]:
def extract_dictionary_values(raw_data) :
    """ Extracts dictionary values from raw data
    """
    prices_data = {}
    for raw_data_item in raw_data:
        date = extract_datetime_value(raw_data_item["date"])
        price = float(raw_data_item["price"])
        prices_data[date] = price
        
    return prices_data

In [6]:
# create formatted data from raw data
diesel_data = extract_dictionary_values(diesel_prices_raw_data)
gasoline_data = extract_dictionary_values(gasoline_prices_raw_data)

In [7]:
# create the consolidated data
all_dates = set(diesel_data.keys()).union(set(gasoline_data.keys()))
all_dates = list(all_dates)
all_dates.sort()

consolidated_data = []

for date_item in all_dates:
    current_item = {}
    current_item["date"] = date_item
    current_item["diesel_price"] = diesel_data[date_item]
    current_item["gasoline_price"] = gasoline_data[date_item]

    consolidated_data.append(current_item)

In [8]:
consolidated_dataset_file = open("./output_data/consolidated_prices.csv", "w", newline="")

dict_writer = csv.DictWriter(consolidated_dataset_file, fieldnames=["date", "diesel_price", "gasoline_price"])
dict_writer.writeheader()
dict_writer.writerows(consolidated_data)

consolidated_dataset_file.close() 