In [1]:
import csv
import datetime 

In [2]:
def read_raw_csv_data (file_path):
    """Reads raw CSV data from a file
    specified by file_path
    """   
    read_file = open(file_path)
    dict_reader = csv.DictReader(read_file)
    read_data = list(dict_reader)
    read_file.close()
    
    return read_data

In [3]:
# read the raw consolidated price data
consolidated_prices_raw_data = read_raw_csv_data("./output_data/consolidated_prices.csv")

In [4]:
def extract_dictionary_values(raw_data) :
    """ Extracts dictionary values from raw data
    """
    prices_data = raw_data.copy()
    for price_data_item in prices_data:
        price_data_item["date"] = datetime.datetime.fromisoformat(price_data_item["date"])
        price_data_item["diesel_price"] = float(price_data_item["diesel_price"])
        price_data_item["gasoline_price"] = float(price_data_item["gasoline_price"])
        
    return prices_data

In [5]:
# create formatted data from raw data
consolidated_prices_data = extract_dictionary_values(consolidated_prices_raw_data)

In [6]:
def extract_dates_dictionary(consolidated_data):
    """Extract data from consolidated data list as a 
    dictionary with the key being the date and the 
    value being the list item
    """
    date_dictionary = {}
    
    for consolidated_data_item in consolidated_data:
        date_dictionary[consolidated_data_item["date"]] =  consolidated_data_item
        
    return date_dictionary

In [7]:
# extract the date dictionary
date_dictionary = extract_dates_dictionary(consolidated_prices_data)

In [8]:
# parse the date dictionary in ascending date order
sorted_dates = list(date_dictionary.keys())
sorted_dates.sort()

date_dictionary[sorted_dates[0]]["diesel_price_difference"] = 0
date_dictionary[sorted_dates[0]]["gasoline_price_difference"] = 0
date_dictionary[sorted_dates[0]]["diesel_price_difference_percent"] = 0
date_dictionary[sorted_dates[0]]["gasoline_price_difference_percent"] = 0

for index in range(1, len(sorted_dates)) :
    diesel_price_today = date_dictionary[sorted_dates[index]]["diesel_price"]
    diesel_price_yesterday = date_dictionary[sorted_dates[index - 1]]["diesel_price"]
    gasoline_price_today = date_dictionary[sorted_dates[index]]["gasoline_price"]
    gasoline_price_yesterday = date_dictionary[sorted_dates[index - 1]]["gasoline_price"]
    
    date_dictionary[sorted_dates[index]]["diesel_price_difference"] = diesel_price_today - diesel_price_yesterday
    date_dictionary[sorted_dates[index]]["gasoline_price_difference"] = gasoline_price_today - gasoline_price_yesterday
    date_dictionary[sorted_dates[index]]["diesel_price_difference_percent"] = date_dictionary[sorted_dates[index]]["diesel_price_difference"]  / diesel_price_yesterday
    date_dictionary[sorted_dates[index]]["gasoline_price_difference_percent"] = date_dictionary[sorted_dates[index]]["gasoline_price_difference"]  / gasoline_price_yesterday

In [9]:
augmented_dataset_file = open("./output_data/augmented_prices.csv", "w", newline="")

dict_writer = csv.DictWriter(augmented_dataset_file, fieldnames=[
    "date",
    "diesel_price",
    "gasoline_price",
    "diesel_price_difference",
    "gasoline_price_difference",
    "diesel_price_difference_percent",
    "gasoline_price_difference_percent",    
])
dict_writer.writeheader()
dict_writer.writerows(date_dictionary.values())

augmented_dataset_file.close()