## Library Import

In [None]:
import pandas as pd
from matplotlib import pyplot
from pandas.plotting import lag_plot
from IPython.display import display,Markdown
import plotly
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import statistics as stats
import warnings
warnings.simplefilter('ignore')
import datetime as dt

'''
import fbprophet
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot
from fbprophet.plot import plot_forecast_component
from prophet_plots_plotly import plot_model_components, plot_model
'''
buttons_to_remove = ['zoom2d','zoomIn2d','zoomOut2d',
                     'sendDataToCloud',
                     'editInChartStudio',
                     'select2d',
                     'lasso2d',]

scatter_config = {'displayModeBar': 'Always',
                 "displaylogo": False,
                 'modeBarButtons': 'toggleHover ',
                 'modeBarButtonsToRemove': buttons_to_remove,}

legend_config = dict(orientation = 'h',
                 yanchor="bottom",
                 y=1.05,
                 xanchor="left",
                 x=0.01)


## Import & Modify CPI Data

In [None]:
'''
Raw CPI Data
'''

cpi_raw = pd.read_excel('CPIRawData.xlsx',engine='openpyxl',sheet_name = 'BLS Data Series',skiprows=11)

'''
Import CPI data, remove monthly data, calculate inflation, and find rolling avg of CPI and inflation
'''

cpi_yearly = cpi_raw.copy()

cpi_yearly = cpi_yearly[['Year','Annual']]
cpi_yearly.rename(columns={"Annual": "CPI",'Year': 'Date'},inplace=True)

window_yearly = 3

cpi_yearly['RollingAvgCPI'] = cpi_yearly.CPI.rolling(window_yearly).mean()
cpi_yearly['RollingAvgCPI_low'] = cpi_yearly.CPI.rolling(window_yearly).min()
cpi_yearly['RollingAvgCPI_high'] = cpi_yearly.CPI.rolling(window_yearly).max()

cpi_yearly['Inflation'] = (cpi_yearly.CPI.diff()/ cpi_yearly['CPI'].shift(1))*100
cpi_yearly['RollingAvgInflation'] = cpi_yearly.Inflation.rolling(window_yearly).mean()
cpi_yearly['RollingAvgInflation_low'] = cpi_yearly.Inflation.rolling(window_yearly).min()
cpi_yearly['RollingAvgInflation_high'] = cpi_yearly.Inflation.rolling(window_yearly).max()

display(Markdown('<h2>Yearly CPI and Inflation Data</h2>'))
display(cpi_yearly.head())


'''
Transform CPI data into monthly format and calculate inflation
'''
cpi_monthly =  cpi_raw.copy()
#drop unnecessary columns
cpi_monthly.drop(columns = ['Annual','HALF1','HALF2'], inplace = True)

window_monthly = 6

#use pd.melt to combine columns and rows
cpi_monthly = pd.melt(cpi_monthly, id_vars=["Year"], var_name="Month", value_name = "CPI")
#modify columns to get usable format and calculate inflation
cpi_monthly['Date'] = cpi_monthly.Month + " " + cpi_monthly.Year.map(str)
cpi_monthly = cpi_monthly[['Date','CPI']]
cpi_monthly["Date"] = pd.to_datetime(cpi_monthly.Date, format="%b %Y", dayfirst=True)
cpi_monthly = cpi_monthly.sort_values("Date")
#cpi_monthly ['Date'] = cpi_monthly['Date'].dt.strftime('%m/%d/%Y')
cpi_monthly ['Date'] = cpi_monthly['Date'].dt.date
cpi_monthly['Inflation'] = (cpi_monthly.CPI.diff()/ cpi_monthly['CPI'].shift(1))*100

cpi_monthly['RollingAvgCPI'] = cpi_monthly.CPI.rolling(window_monthly).mean()
cpi_monthly['RollingAvgCPI_low'] = cpi_monthly.CPI.rolling(window_monthly).min()
cpi_monthly['RollingAvgCPI_high'] = cpi_monthly.CPI.rolling(window_monthly).max()

cpi_monthly['Inflation'] = (cpi_monthly.CPI.diff()/ cpi_monthly['CPI'].shift(1))*100
cpi_monthly['RollingAvgInflation'] = cpi_monthly.Inflation.rolling(window_monthly).mean()
cpi_monthly['RollingAvgInflation_low'] = cpi_monthly.Inflation.rolling(window_monthly).min()
cpi_monthly['RollingAvgInflation_high'] = cpi_monthly.Inflation.rolling(window_monthly).max()

display(Markdown('<h2>Monthly CPI and Inflation Data</h2>'))
display(cpi_monthly.head())

<h2>Yearly CPI and Inflation Data</h2>

Unnamed: 0,Date,CPI,RollingAvgCPI,RollingAvgCPI_low,RollingAvgCPI_high,Inflation,RollingAvgInflation,RollingAvgInflation_low,RollingAvgInflation_high
0,1913,9.9,,,,,,,
1,1914,10.0,,,,1.010101,,,
2,1915,10.1,10.0,9.9,10.1,1.0,,,
3,1916,10.9,10.333333,10.0,10.9,7.920792,3.310298,1.0,7.920792
4,1917,12.8,11.266667,10.1,12.8,17.431193,8.783995,1.0,17.431193


<h2>Monthly CPI and Inflation Data</h2>

Unnamed: 0,Date,CPI,Inflation,RollingAvgCPI,RollingAvgCPI_low,RollingAvgCPI_high,RollingAvgInflation,RollingAvgInflation_low,RollingAvgInflation_high
0,1913-01-01,9.8,,,,,,,
110,1913-02-01,9.8,0.0,,,,,,
220,1913-03-01,9.8,0.0,,,,,,
330,1913-04-01,9.8,0.0,,,,,,
440,1913-05-01,9.7,-1.020408,,,,,,


## Plot CPI and inflation data with rolling avg

In [None]:
'''
CPI Yearly
'''

fig = go.Figure()

y = cpi_yearly['CPI']
x = cpi_yearly['Date']
y_roll = cpi_yearly['RollingAvgCPI']
y_rollmin = cpi_yearly['RollingAvgCPI_low']
y_rollmax = cpi_yearly['RollingAvgCPI_high']

fig.add_trace(go.Scatter(x=x, y=y,
                         marker=dict(color='#005100',size = 5),
                         line = dict(color='#005100',width = 1),
                         mode='markers+lines',
                         name='Yearly CPI'))

fig.add_trace(go.Scatter(x=x, y=y_roll,
                         line = dict(color='#FF3333',width = 1),
                         mode='lines',
                         name=f'{window_yearly} Year Rolling Avg'))

fig.add_trace(go.Scatter(x=x, y=y_rollmin,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         name=f'{window_yearly} Year Rolling Low'))

fig.add_trace(go.Scatter(x=x, y=y_rollmax,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         fill='tonexty',
                         name=f'{window_yearly} Year Rolling High'))

fig.update_xaxes(tickangle=-45, tickfont = dict(family = 'Arial', size = 14,color = 'black'))
fig.update_layout(hovermode="x unified",clickmode ='select',height=500,width=1100,)
fig.update_layout(legend=legend_config)

display(Markdown('<h2>Yearly CPI Data</h2>'))
fig.show(config=scatter_config)

'''
CPI Monthly
'''

fig = go.Figure()

y = cpi_monthly['CPI']
x = cpi_monthly['Date']
y_roll = cpi_monthly['RollingAvgCPI']
y_rollmin = cpi_monthly['RollingAvgCPI_low']
y_rollmax = cpi_monthly['RollingAvgCPI_high']

fig.add_trace(go.Scatter(x=x, y=y,
                         marker=dict(color='#005100',size = 5),
                         line = dict(color='#005100',width = 1),
                         mode='markers+lines',
                         name='Monthly CPI'))

fig.add_trace(go.Scatter(x=x, y=y_roll,
                         line = dict(color='#FF3333',width = 1),
                         mode='lines',
                         name=f'{window_monthly} Month Rolling Avg'))

fig.add_trace(go.Scatter(x=x, y=y_rollmin,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         name=f'{window_monthly} Month Rolling Low'))

fig.add_trace(go.Scatter(x=x, y=y_rollmax,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         fill='tonexty',
                         name=f'{window_monthly} Month Rolling High'))

fig.update_xaxes(tickangle=-45, tickfont = dict(family = 'Arial', size = 14,color = 'black'))
fig.update_layout(hovermode="x unified",clickmode ='select',height=500,width=1100,)
fig.update_layout(legend=legend_config)

display(Markdown('<h2>Monthly CPI Data</h2>'))
fig.show(config=scatter_config)

'''
Inflation Yearly
'''

fig = go.Figure()

y = cpi_yearly['Inflation']
x = cpi_yearly['Date']
y_roll = cpi_yearly['RollingAvgInflation']
y_rollmin = cpi_yearly['RollingAvgInflation_low']
y_rollmax = cpi_yearly['RollingAvgInflation_high']

fig.add_trace(go.Scatter(x=x, y=y,
                         marker=dict(color='#005100',size = 5),
                         line = dict(color='#005100',width = 1),
                         mode='markers+lines',
                         name='Yearly Inflation'))

fig.add_trace(go.Scatter(x=x, y=y_roll,
                         line = dict(color='#FF3333',width = 1),
                         mode='lines',
                         name=f'{window_yearly} Year Rolling Avg'))

fig.add_trace(go.Scatter(x=x, y=y_rollmin,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         name=f'{window_yearly} Year Rolling Low'))

fig.add_trace(go.Scatter(x=x, y=y_rollmax,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         fill='tonexty',
                         name=f'{window_yearly} Year Rolling High'))

fig.update_xaxes(tickangle=-45, tickfont = dict(family = 'Arial', size = 14,color = 'black'))
fig.update_layout(hovermode="x unified",clickmode ='select',height=500,width=1100,)
fig.update_layout(legend=legend_config)

display(Markdown('<h2>Yearly Inflation Data</h2>'))
fig.show(config=scatter_config)


'''
Inflation Monthly
'''

fig = go.Figure()

y = cpi_monthly['Inflation']
x = cpi_monthly['Date']
y_roll = cpi_monthly['RollingAvgInflation']
y_rollmin = cpi_monthly['RollingAvgInflation_low']
y_rollmax = cpi_monthly['RollingAvgInflation_high']

fig.add_trace(go.Scatter(x=x, y=y,
                         marker=dict(color='#005100',size = 5),
                         line = dict(color='#005100',width = 1),
                         mode='markers+lines',
                         name='Monthly Inflation'))

fig.add_trace(go.Scatter(x=x, y=y_roll,
                         line = dict(color='#FF3333',width = 1),
                         mode='lines',
                         name=f'{window_monthly} Month Rolling Avg'))

fig.add_trace(go.Scatter(x=x, y=y_rollmin,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         name=f'{window_monthly} Month Rolling Low'))

fig.add_trace(go.Scatter(x=x, y=y_rollmax,
                         line = dict(color='#bf9fa2',width = 1),
                         mode='lines',
                         fill='tonexty',
                         name=f'{window_monthly} Month Rolling High'))

fig.update_xaxes(tickangle=-45, tickfont = dict(family = 'Arial', size = 14,color = 'black'))
fig.update_layout(hovermode="x unified",clickmode ='select',height=500,width=1100,)
fig.update_layout(legend=legend_config)

display(Markdown('<h2>Monthly Inflation Data</h2>'))
fig.show(config=scatter_config)

<h2>Yearly CPI Data</h2>

<h2>Monthly CPI Data</h2>

<h2>Yearly Inflation Data</h2>

<h2>Monthly Inflation Data</h2>

In [None]:
#don't run causes memory error attempt at autoarima

'''import pmdarima as pm

model_data = cpi_monthly.copy()
model_data.set_index('Date',inplace = True)
model_data = model_data[['Inflation']].dropna()

display(model_data.head())
# Seasonal - fit stepwise auto-ARIMA
smodel = pm.auto_arima(model_data, start_p=1, start_q=1,
                       test='adf',
                       max_p=3, max_q=3, m=12,
                       start_P=0, seasonal=True,
                       d=None, D=1, trace=True,
                       error_action='ignore',  
                       suppress_warnings=True,
                       stepwise=True)

smodel.summary()'''




## Import item price data

<h1> STILL WORKING ON FILTERING THIS DATA CODE IS MESSY AND INCOMPLETE </h1>

In [None]:
pricing = pd.read_excel('IndividualPricingData.xls',engine='xlrd',sheet_name = 'Monthly')
pricing['DATE'] = pricing['DATE'].dt.date
rename_map = pd.read_excel('IndividualPricingData.xls',engine='xlrd',sheet_name = 'MatchedMap')

for column in pricing.columns:
    if column == 'DATE':
        #skip column if it's the date column
        continue
    else:
        temp_real_col = rename_map['StripName2'][rename_map['SourceID'] == column].values[0]
        temp_real_col = temp_real_col.split(' (')
        #print(temp_real_col)
        pricing.rename(columns = {column: temp_real_col[0]},inplace = True)

pricing.set_index('DATE',inplace = True)

display(Markdown('<h2>Pricing Data</h2>'))
display(pricing.head())
'''
for column in pricing.columns:
    print(column)
    inflation_name = column + ' INFLATION'
    pricing[inflation_name] = (pricing[column].diff()/ pricing[column].shift(1))*100
'''


<h2>Pricing Data</h2>

Unnamed: 0_level_0,"Flour, White, All Purpose","Rice, White, Long Grain, Precooked","Rice, White, Long Grain, Uncooked",Spaghetti,Spaghetti and Macaroni,"Bread, White, Pan","Bread, French","Bread, Rye, Pan","Bread, Whole Wheat, Pan","Bread, Wheat Blend, Pan",...,All Soft Drinks,"All Soft Drinks, 12 Pk, 12 Ounce, Cans","Butter, Stick",Beef Liver,"Steak, T-Bone, USDA Choice, Bone-In","Ice Cream, Prepackaged, Bulk, Regular, per One-Half","Gasoline, All Types",Electricity per Kilowatt-Hour in Size Class D,Utility,U.S. Natural Gas Liquid Composite Price
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1973-10-01,,,,,,,,,,,...,,,,,,,,,,
1973-11-01,,,,,,,,,,,...,,,,,,,,,,
1973-12-01,,,,,,,,,,,...,,,,,,,,,,
1974-01-01,,,,,,,,,,,...,,,,,,,,,,
1974-02-01,,,,,,,,,,,...,,,,,,,,,,


"\nfor column in pricing.columns:\n    print(column)\n    inflation_name = column + ' INFLATION'\n    pricing[inflation_name] = (pricing[column].diff()/ pricing[column].shift(1))*100\n"

In [None]:
print(len(list(pricing.columns.values)))

print(len(list(set(pricing.columns.values))))

138
132


In [None]:
import collections
dupe_columns = [item for item, count in collections.Counter(list(pricing.columns.values)).items() if count > 1]
display('The following column names are dupliated:')
display(dupe_columns)



'The following column names are dupliated:'

[' Steak, T-Bone, USDA Choice, Bone-In',
 ' Beef Liver',
 ' Ice Cream, Prepackaged, Bulk, Regular, per One-Half',
 ' Potatoes, White',
 ' Utility',
 ' Gasoline, All Types']

In [None]:
size_prices = {}

for column in pricing.columns:
    temp_series = pricing[column].dropna()
    item_datapoints = len(temp_series)
    size_prices[column] = item_datapoints

df_sizes = pd.DataFrame(size_prices, index=['DataPoints']).T
df_sizes.reset_index(inplace = True)
df_sizes.rename(columns = {'index': 'ItemNames'},inplace = True)

fig = px.bar(df_sizes, x='ItemNames', y='DataPoints')
fig.update_layout(height=700,width=1100,)
display(Markdown('<h2>Number of Data Points per Item</h2>'))
fig.show()


'''item_most_data = max(size_prices, key=size_prices.get)
size_most_data = size_prices[item_most_data]
display(Markdown(f'Item with the most data: **{item_most_data} ({size_most_data} points)**'))

pct_of_max = 0.6
min_num_points = int(pct_of_max*size_most_data)
display(Markdown(f'Removing Variables with < **{min_num_points} points**'))'''

mode_data = stats.mode(df_sizes['DataPoints'])
display(Markdown(f'Mode of data points: **{mode_data} points**'))
display(Markdown(f'Keeping variables with only data points = **{mode_data} points**'))


ideal_data_keys = []
for key in size_prices.keys():
    if size_prices[key] == 506 or size_prices[key] == 505:
        ideal_data_keys.append(key)


pricing_filtered = pricing.copy()
pricing_filtered =  pricing_filtered[ideal_data_keys]
pricing_filtered = pricing_filtered.dropna()
pricing_filtered



<h2>Number of Data Points per Item</h2>

Mode of data points: **506 points**

Keeping variables with only data points = **506 points**

Unnamed: 0_level_0,"Flour, White, All Purpose","Bread, White, Pan","Cookies, Chocolate Chip","Ground Chuck, 100% Beef","Round Roast, USDA Choice, Boneless","Steak, Round, USDA Choice, Boneless","Bacon, Sliced","Chops, Center Cut, Bone-In","Chicken, Fresh, Whole","Chicken Legs, Bone-In","Eggs, Grade A, Large","Orange Juice, Frozen Concentrate, 12 Ounce Can","Sugar, White, All Sizes",Potato Chips
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1980-01-01,0.203,0.501,1.491,1.821,2.606,2.724,1.453,1.962,0.699,1.060,0.879,1.243,0.273,1.981
1980-02-01,0.205,0.507,1.495,1.843,2.621,2.725,1.410,1.906,0.673,1.024,0.774,1.271,0.301,1.994
1980-03-01,0.211,0.502,1.531,1.855,2.582,2.737,1.361,1.881,0.655,0.985,0.812,1.201,0.355,2.003
1980-04-01,0.206,0.507,1.537,1.818,2.574,2.730,1.323,1.870,0.638,0.967,0.797,1.199,0.368,2.006
1980-05-01,0.207,0.504,1.541,1.807,2.577,2.713,1.266,1.802,0.628,0.985,0.737,1.162,0.380,2.006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-01,0.396,1.526,3.815,4.869,6.330,7.304,7.317,4.394,1.523,1.709,1.821,2.631,0.713,5.273
2021-11-01,0.386,1.547,3.999,4.810,6.426,7.404,7.265,4.356,1.583,1.733,1.718,2.651,0.687,5.246
2021-12-01,0.388,1.532,4.089,4.792,6.234,7.342,7.211,4.357,1.606,1.729,1.788,2.654,0.691,5.150
2022-01-01,0.424,1.555,4.223,4.765,6.158,7.378,7.222,4.392,1.622,1.731,1.929,2.621,0.706,5.258


In [None]:
fig = go.Figure()

# Loop df columns and plot columns to the figure
for i in pricing_filtered.columns:
    fig.add_trace(go.Scatter(x=pricing_filtered.index.values, y=pricing_filtered[i],
                        mode='lines', # 'lines' or 'markers'
                        name=i))
display(Markdown('<h2>Price of Goods with the Same Time Period</h2>'))
fig.show()

<h2>Price of Goods with the Same Time Period</h2>

In [None]:
pricing_filtered_inf = pricing_filtered.copy()

for column in pricing_filtered_inf.columns:
    inflation_name = column + ' INFLATION'
    pricing_filtered_inf[inflation_name] = (pricing_filtered_inf[column].diff()/ pricing_filtered_inf[column].shift(1))*100

fig = go.Figure()

# Loop df columns and plot columns to the figure
for i in pricing_filtered_inf.columns:
    if 'INFLATION' in i:
        fig.add_trace(go.Scatter(x=pricing_filtered_inf.index.values, y=pricing_filtered_inf[i],
                            mode='lines', # 'lines' or 'markers'
                            name=i))
display(Markdown('<h2>Inflation Rate of Goods with the Same Time Period</h2>'))
fig.show()

<h2>Inflation Rate of Goods with the Same Time Period</h2>

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c5ceaca9-e365-4132-a867-b543164c96b0' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>