# Real Impact Of Inflation

Our project is to uncover the difference between the reported and actual urban inflation. We will be using the CPI data, from the Bureau of Labor Statistics, with and without food and energy costs to answer this question. Additionally, we will also be analyzing the differences in inflation between the various US regions, compared to the national average, over time.

## The Data
@TODO: Insert description of the BLS data

In [122]:
import pandas as pd
import plotly.express as px
import panel as pn
import hvplot.pandas
import geoviews as gv
import cartopy.crs as ccrs
from pathlib import Path
import datetime

### Reading/Pre-Processing the Data

In [123]:
# List of csv titles
non_regional_titles = ['All','Energy','Food','Gasoline', 'Less-Food-Energy']
regional_title = ['Midwest', 'Northeast', 'South', 'West']

In [124]:
def get_non_regional_dfs():
    # Empty list to hold all dataframes
    dataframes = []
    months = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12}
    
    # Looping over csv titles list
    for title in non_regional_titles:
        # Saving the csv (by title) as DataFrame, making Year the index column and dropping the HALF1 and HALF2 columns
        df = pd.read_csv(Path(f'./Data/{title}.csv')).drop(columns=['HALF1','HALF2']).melt(id_vars=['Year'], var_name='Month', value_name='Price')
        df['Day']=1
        df['Month'] = df['Month'].map(months)
        df['Date']=pd.to_datetime(dict(year=df.Year, month=df.Month, day=df.Day))
        df = df.sort_values('Date')
        df.index = df['Date']
        df = df.drop(columns=['Year','Month','Day','Date'])
        df = df.dropna()
        dataframes.append(df)
    
    # Returning all the dataframes
    return dataframes
def get_dfs():
    # Empty list to hold all dataframes
    dataframes = []
    months = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12}
    
    # Looping over csv titles list
    for title in regional_title:
        # Saving the csv (by title) as DataFrame, making Year the index column and dropping the HALF1 and HALF2 columns
        df = pd.read_csv(Path(f'./Data/{title}.csv')).drop(columns=['HALF1','HALF2','Annual']).melt(id_vars=['Year'], var_name='Month', value_name='Price')
        df['Day']=1
        df['Month'] = df['Month'].map(months)
        df['Date']=pd.to_datetime(dict(year=df.Year, month=df.Month, day=df.Day))
        df = df.sort_values('Date')
        df.index = df['Date']
        df = df.drop(columns=['Year','Month','Day','Date'])
        df = df.dropna()
        dataframes.append(df)
    
    # Returning all the dataframes
    return dataframes

In [125]:
# Getting and storing each DataFrame from get_dfs method
non_regional_dfs = [all_df, energy_df, food_df, gasoline_df, less_food_energy_df] = get_non_regional_dfs()
regional_dfs = [midwest_df, northeast_df, south_df, west_df] = get_dfs()
# Looping over csv titles and indexes
for index, title in enumerate(non_regional_titles):
    # Printing CSV Title and DataFrame Name
    df_title = 'less_food_energy_df' if title == 'Less-Food-Energy' else title.lower()
    print(f'\033[1mCSV Title\033[0m: {title}.csv\n\033[1mDataFrame Name\033[0m: {df_title}_df')
    # Displaying DataFrame
    display(non_regional_dfs[index])
    # Printing new line
    print('\n')

for index, title in enumerate(regional_titles):
    # Printing CSV Title and DataFrame Name
    df_title = 'less_food_energy_df' if title == 'Less-Food-Energy' else title.lower()
    print(f'\033[1mCSV Title\033[0m: {title}.csv\n\033[1mDataFrame Name\033[0m: {df_title}_df')
    # Displaying DataFrame
    display(regional_dfs[index])
    # Printing new line
    print('\n')

[1mCSV Title[0m: All.csv
[1mDataFrame Name[0m: all_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,226.665
2012-02-01,227.663
2012-03-01,229.392
2012-04-01,230.085
2012-05-01,229.815
...,...
2022-05-01,292.296
2022-06-01,296.311
2022-07-01,296.276
2022-08-01,296.171




[1mCSV Title[0m: Energy.csv
[1mDataFrame Name[0m: energy_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,236.942
2012-02-01,242.663
2012-03-01,253.599
2012-04-01,255.736
2012-05-01,250.306
...,...
2022-05-01,316.761
2022-06-01,340.917
2022-07-01,325.407
2022-08-01,305.372




[1mCSV Title[0m: Food.csv
[1mDataFrame Name[0m: food_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,232.666
2012-02-01,232.486
2012-03-01,232.792
2012-04-01,233.234
2012-05-01,233.339
...,...
2022-05-01,302.038
2022-06-01,305.041
2022-07-01,308.532
2022-08-01,310.875




[1mCSV Title[0m: Gasoline.csv
[1mDataFrame Name[0m: gasoline_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,290.762
2012-02-01,305.076
2012-03-01,329.780
2012-04-01,335.742
2012-05-01,323.604
...,...
2022-05-01,391.262
2022-06-01,430.142
2022-07-01,396.952
2022-08-01,348.593




[1mCSV Title[0m: Less-Food-Energy.csv
[1mDataFrame Name[0m: less_food_energy_df_df


Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2012-01-01,227.237
2012-02-01,227.865
2012-03-01,228.735
2012-04-01,229.303
2012-05-01,229.602
...,...
2022-05-01,292.506
2022-06-01,294.680
2022-07-01,295.646
2022-08-01,297.178






NameError: name 'regional_titles' is not defined

In [None]:
all_df2.plot()