In [21]:
import pandas as pd
import numpy as np

In [22]:
file_path = "Food_CPI_data.csv"
CPI_df = pd.read_csv(file_path)
CPI_df

Unnamed: 0,DATE,CUSR0000SAF112
0,1967-01-01,38.100
1,1967-02-01,38.000
2,1967-03-01,37.800
3,1967-04-01,37.500
4,1967-05-01,37.400
...,...,...
668,2022-09-01,318.374
669,2022-10-01,319.917
670,2022-11-01,320.034
671,2022-12-01,322.507


In [23]:
#Rename the price column
CPI_df.rename(mapper={"CUSR0000SAF112" : "Index (1982-1984=100)"}, axis=1, inplace=True)
CPI_df.head()

Unnamed: 0,DATE,Index (1982-1984=100)
0,1967-01-01,38.1
1,1967-02-01,38.0
2,1967-03-01,37.8
3,1967-04-01,37.5
4,1967-05-01,37.4


In [24]:
#View datatypes to see what needs to be changed
CPI_df.dtypes

DATE                      object
Index (1982-1984=100)    float64
dtype: object

In [25]:
#Change DATE to datetime
CPI_df['DATE'] = pd.to_datetime(CPI_df["DATE"], format="%Y-%m-%d")
CPI_df.head()

Unnamed: 0,DATE,Index (1982-1984=100)
0,1967-01-01,38.1
1,1967-02-01,38.0
2,1967-03-01,37.8
3,1967-04-01,37.5
4,1967-05-01,37.4


In [26]:
#View datatypes to confirm change
CPI_df.dtypes

DATE                     datetime64[ns]
Index (1982-1984=100)           float64
dtype: object

In [27]:
#check if there are null values
CPI_df.isnull().sum()

DATE                     0
Index (1982-1984=100)    0
dtype: int64

In [28]:
# Remove all data before 1990-01-01 and store to new df
cleaned_CPI_df = CPI_df[CPI_df["DATE"] >= "1990-01-01"]
cleaned_CPI_df

Unnamed: 0,DATE,Index (1982-1984=100)
276,1990-01-01,126.100
277,1990-02-01,127.000
278,1990-03-01,127.400
279,1990-04-01,128.100
280,1990-05-01,128.200
...,...,...
668,2022-09-01,318.374
669,2022-10-01,319.917
670,2022-11-01,320.034
671,2022-12-01,322.507


In [29]:
# rename "DATE" column to "date_time" per team data process in README
cleaned_CPI_df.rename(mapper={"DATE" : "date_time"}, axis=1, inplace=True)
cleaned_CPI_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,date_time,Index (1982-1984=100)
276,1990-01-01,126.1
277,1990-02-01,127.0
278,1990-03-01,127.4
279,1990-04-01,128.1
280,1990-05-01,128.2


In [31]:
# Calculate percent change
cleaned_CPI_df["Pct_Change"] = cleaned_CPI_df["Index (1982-1984=100)"].pct_change()
cleaned_CPI_df.head(30)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,date_time,Index (1982-1984=100),Pct_Change
276,1990-01-01,126.1,
277,1990-02-01,127.0,0.007137
278,1990-03-01,127.4,0.00315
279,1990-04-01,128.1,0.005495
280,1990-05-01,128.2,0.000781
281,1990-06-01,130.2,0.015601
282,1990-07-01,130.3,0.000768
283,1990-08-01,130.9,0.004605
284,1990-09-01,131.8,0.006875
285,1990-10-01,132.7,0.006829


In [32]:
# Store new DF as new csv file
output_path = "Output/Food_CPI_cleaned.csv"
cleaned_CPI_df.to_csv(output_path)