# Real Impact Of Inflation

Our project is to uncover the difference between the reported and actual urban inflation. We will be using the CPI data, from the Bureau of Labor Statistics, with and without food and energy costs to answer this question. Additionally, we will also be analyzing the differences in inflation between the various US regions, compared to the national average, over time.

## The Data
@TODO: Insert description of the BLS data

In [182]:
# Importing dependencies
import pandas as pd
import plotly.express as px
import panel as pn
import hvplot.pandas
import geoviews as gv
import cartopy.crs as ccrs
from pathlib import Path
import datetime

### Reading/Pre-Processing the Data

In [183]:
# List of non-regional csv titles
non_regional_csv_titles = ['All','Energy','Food','Gasoline', 'Less-Food-Energy']
# List of regional csv titles
regional_csv_titles = ['Midwest', 'Northeast', 'South', 'West']

In [184]:
class Df_Processor:
    # Constructor
    def __init__(self,titles,is_regional=False):
        # Save titles
        self.titles = titles
        # Save is_regional
        self.is_regional = is_regional
        
        # Set empty list for processed DataFrames
        self.processed_dataframes = []
        
        # Run dataframe processing method
        self.process_dfs()

    # Method to get dataframes
    def get_dfs(self):
        # Empty list to hold DataFrames
        dataframes = []
        
        # Looping over csv titles list
        for title in self.titles:
            # Columns to drop
            columns_to_drop = ['HALF1','HALF2']
            
            # If the regional set is 
            if self.is_regional:
                # Add the Annual column
                columns_to_drop.append('Annual')

            # Saving the csv (by title) as DataFrame, making Year the index column and dropping the HALF1 and HALF2 columns
            df = pd.read_csv(Path(f'./Data/{title}.csv')).drop(columns=columns_to_drop).melt(id_vars=['Year'], var_name='Month', value_name='Price')

            # Appending dataframe to dataframes list
            dataframes.append(df)
        
        # Returning all dataframes
        return dataframes

    # Method to data pre-process dataframes
    def process_dfs(self):
        # Dictionary of months and thier numeric values
        months = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12}
        # Get and loop over DataFrames
        for df in self.get_dfs():
            # Creating Day column and setting values to 1
            df['Day']=1
            # Mapping month from string to int
            df['Month'] = df['Month'].map(months)

            # Creating date column
            df['Date']=pd.to_datetime(dict(year=df.Year, month=df.Month, day=df.Day))
            # Sorting Date values
            df = df.sort_values('Date')
            # Making index Date
            df.index = df['Date']

            # Dropping Year, Month, Day, Date columns
            df = df.drop(columns=['Year','Month','Day','Date'])
            # Dropping NaN values
            df = df.dropna()

            # Adding dataframe to processed_dataframes list
            self.processed_dataframes.append(df)
    
    # Method to return the processed dataframes
    def get_processed_dataframes(self):
        # Return the processed dataframes
        return self.processed_dataframes


In [185]:
# Getting and storing non-regional DataFrames
non_regional_dfs = [all_df, energy_df, food_df, gasoline_df, less_food_energy_df] = Df_Processor(non_regional_csv_titles).get_processed_dataframes()
# Getting and storing regional DataFrames
regional_dfs = [midwest_df, northeast_df, south_df, west_df] = Df_Processor(regional_csv_titles,True).get_processed_dataframes()

In [186]:
# Method to display dataframes and titles
def display_data(df,titles):
    # Looping over csv titles and indexes
    for index, title in enumerate(titles):
        # Printing CSV Title and DataFrame Name
        df_title = 'less_food_energy_df' if title == 'Less-Food-Energy' else title.lower()
        print(f'\033[1mCSV Title\033[0m: {title}.csv\n\033[1mDataFrame Name\033[0m: {df_title}_df')
        # Displaying DataFrame
        display(df[index])
        # Printing new line
        print('\n')

In [187]:
# Printing non-regional data
display_data(non_regional_dfs,non_regional_csv_titles)

[1mCSV Title[0m: All.csv
[1mDataFrame Name[0m: all_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,226.665
2012-02-01,227.663
2012-03-01,229.392
2012-04-01,230.085
2012-05-01,229.815
...,...
2022-05-01,292.296
2022-06-01,296.311
2022-07-01,296.276
2022-08-01,296.171




[1mCSV Title[0m: Energy.csv
[1mDataFrame Name[0m: energy_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,236.942
2012-02-01,242.663
2012-03-01,253.599
2012-04-01,255.736
2012-05-01,250.306
...,...
2022-05-01,316.761
2022-06-01,340.917
2022-07-01,325.407
2022-08-01,305.372




[1mCSV Title[0m: Food.csv
[1mDataFrame Name[0m: food_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,232.666
2012-02-01,232.486
2012-03-01,232.792
2012-04-01,233.234
2012-05-01,233.339
...,...
2022-05-01,302.038
2022-06-01,305.041
2022-07-01,308.532
2022-08-01,310.875




[1mCSV Title[0m: Gasoline.csv
[1mDataFrame Name[0m: gasoline_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,290.762
2012-02-01,305.076
2012-03-01,329.780
2012-04-01,335.742
2012-05-01,323.604
...,...
2022-05-01,391.262
2022-06-01,430.142
2022-07-01,396.952
2022-08-01,348.593




[1mCSV Title[0m: Less-Food-Energy.csv
[1mDataFrame Name[0m: less_food_energy_df_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,227.237
2012-02-01,227.865
2012-03-01,228.735
2012-04-01,229.303
2012-05-01,229.602
...,...
2022-05-01,292.506
2022-06-01,294.680
2022-07-01,295.646
2022-08-01,297.178






In [188]:
# Printing regional data
display_data(regional_dfs,regional_csv_titles)

[1mCSV Title[0m: Midwest.csv
[1mDataFrame Name[0m: midwest_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,216.368
2012-02-01,216.855
2012-03-01,218.975
2012-04-01,219.405
2012-05-01,219.145
...,...
2022-05-01,272.673
2022-06-01,277.072
2022-07-01,276.525
2022-08-01,275.875




[1mCSV Title[0m: Northeast.csv
[1mDataFrame Name[0m: northeast_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,242.879
2012-02-01,243.850
2012-03-01,245.125
2012-04-01,245.850
2012-05-01,245.709
...,...
2022-05-01,302.939
2022-06-01,306.453
2022-07-01,305.916
2022-08-01,306.855




[1mCSV Title[0m: South.csv
[1mDataFrame Name[0m: south_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,220.497
2012-02-01,221.802
2012-03-01,223.314
2012-04-01,224.275
2012-05-01,223.356
...,...
2022-05-01,283.307
2022-06-01,287.427
2022-07-01,287.608
2022-08-01,287.168




[1mCSV Title[0m: West.csv
[1mDataFrame Name[0m: west_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,228.980
2012-02-01,229.995
2012-03-01,232.039
2012-04-01,232.561
2012-05-01,233.053
...,...
2022-05-01,309.645
2022-06-01,313.496
2022-07-01,313.951
2022-08-01,314.013




