In [11]:
import csv
import datetime
import json

In [12]:
def read_raw_csv_data(file_path):
    """Reads raw CSV data from a file
    specified by file_path
    """
    read_file = open(file_path)
    dict_reader = csv.DictReader(read_file)
    read_data = list(dict_reader)
    read_file.close()

    return read_data

In [13]:
# read the raw consolidated price data
consolidated_prices_raw_data = read_raw_csv_data("./data/augmented_prices.csv")

In [14]:
def extract_dictionary_values(raw_data):
    """ Extracts dictionary values from raw data
    """
    prices_data = raw_data.copy()
    for price_data_item in prices_data:
        # convert data record by record
        price_data_item["date"] = datetime.datetime.fromisoformat(price_data_item["date"])
        price_data_item["diesel_price"] = float(price_data_item["diesel_price"])
        price_data_item["gasoline_price"] = float(price_data_item["gasoline_price"])
        price_data_item["diesel_price_difference"] = float(price_data_item["diesel_price_difference"])
        price_data_item["gasoline_price_difference"] = float(price_data_item["gasoline_price_difference"])
        price_data_item["diesel_price_difference_percent"] = float(price_data_item["diesel_price_difference_percent"])
        price_data_item["gasoline_price_difference_percent"] = float(price_data_item["gasoline_price_difference_percent"])
        price_data_item["year"] = price_data_item["date"].year
        price_data_item["month"] = price_data_item["date"].month

    return prices_data

In [15]:
# create formatted data from raw data
consolidated_prices_data = extract_dictionary_values(consolidated_prices_raw_data)

In [16]:
def extract_reporting_data_dictionary(consolidated_data):
    """Extract reporting data from consolidated data list as a 
    dictionary with the key being the date and the 
    value being another dictionary with the key the month and the 
    value being the reporting data list
    """
    reporting_data_dictionary = {}

    # explore the consolidated data year by year
    for consolidated_data_item in consolidated_data:
        current_year = consolidated_data_item["year"]
        current_month = consolidated_data_item["month"]

        # allocate the year and month entry in the dictionary
        if not current_year in reporting_data_dictionary:
            reporting_data_dictionary[current_year] = {}
        month_data_dictionary = reporting_data_dictionary[current_year]
        if not current_month in month_data_dictionary:
            # make sure that month data is properly initialized
            month_data_dictionary[current_month] = {}
            month_data_dictionary[current_month]["diesel_price"] = []
            month_data_dictionary[current_month]["gasoline_price"] = []
            month_data_dictionary[current_month]["diesel_price_difference"] = []
            month_data_dictionary[current_month]["gasoline_price_difference"] = []
            month_data_dictionary[current_month]["diesel_price_difference_percent"] = []
            month_data_dictionary[current_month]["gasoline_price_difference_percent"] = []

        # add read data into the dictionary list values
        month_data_dictionary[current_month]["diesel_price"].append(consolidated_data_item["diesel_price"])
        month_data_dictionary[current_month]["gasoline_price"].append(consolidated_data_item["gasoline_price"])
        month_data_dictionary[current_month]["diesel_price_difference"].append(consolidated_data_item["diesel_price_difference"])
        month_data_dictionary[current_month]["gasoline_price_difference"].append(consolidated_data_item["gasoline_price_difference"])
        month_data_dictionary[current_month]["diesel_price_difference_percent"].append(consolidated_data_item["diesel_price_difference_percent"])
        month_data_dictionary[current_month]["gasoline_price_difference_percent"].append(consolidated_data_item["gasoline_price_difference_percent"])

    return reporting_data_dictionary

In [17]:
# extract the reporting data dictionary
reporting_dictionary = extract_reporting_data_dictionary(
    consolidated_prices_data)

In [18]:
def get_year_month_summary_data(reporting_data_dictionary):
    """ Extracts the year-month price summary data 
    from a reporting data dictionary
    """
    year_month_summarization = {}

    # explore the reporting data dictionary year by year
    for year in reporting_data_dictionary.keys():
        if not year in year_month_summarization:
            # allocate year entry in reporting data
            year_month_summarization[year] = {}
        year_items_dictionary = reporting_data_dictionary[year]

        # explore the monthly data for the current year
        for month in year_items_dictionary.keys():
            if not month in year_month_summarization[year]:
                # allocate month entry in reporting data
                year_month_summarization[year][month] = {}

            # extract basic data
            month_item = year_items_dictionary[month]
            report_item = year_month_summarization[year][month]

            # calculate reporting data and store it
            report_item["min_diesel_price"] = min(month_item["diesel_price"])
            report_item["avg_diesel_price"] = sum(month_item["diesel_price"]) / len(month_item["diesel_price"])
            report_item["max_diesel_price"] = max(month_item["diesel_price"])

            report_item["min_gasoline_price"] = min(month_item["gasoline_price"])
            report_item["avg_gasoline_price"] = sum(month_item["gasoline_price"]) / len(month_item["gasoline_price"])
            report_item["max_gasoline_price"] = max(month_item["gasoline_price"])

    # return the created values
    return year_month_summarization

In [19]:
year_month_summary_data = get_year_month_summary_data(reporting_dictionary)

In [20]:
# save the created reporting data into the JSON storage file
json_storage = open("./output_data/year_month_summarization.json", "w")
json.dump(year_month_summary_data, json_storage, indent=2)
json_storage.close()