# Average Coders

### Gathering and Cleaning the Data

In [1]:
import pandas as pd


Interest Rates

In [2]:
# Read the interest rate CSV files and load into a dataframe
# Short term interest rates are represented by the 3-month US Treasury Yield
short_term_csv = "Interest_Rates/US_Treasury/GS3M.csv"
short_term_df = pd.read_csv (short_term_csv)
short_term_df.rename(columns={"DATE":"date", "VALUE":"st_interest"}, inplace=True)
print(f'The number of rows of data for 3-month Treasury is {len(short_term_df.index)}')
short_term_df.head()

The number of rows of data for 3-month Treasury is 444


Unnamed: 0,date,st_interest
0,1982-01-01,12.92
1,1982-02-01,14.28
2,1982-03-01,13.31
3,1982-04-01,13.34
4,1982-05-01,12.71


In [3]:
# Intermediate term interest rates are represented by the 2-year US Treasury Yield
interm_term_csv = "Interest_Rates/US_Treasury/GS2.csv"
interm_term_df = pd.read_csv (interm_term_csv)
interm_term_df.rename(columns={"DATE":"date", "VALUE":"it_interest"}, inplace=True)
print(f'The number of rows of data for 2-year Treasury is {len(interm_term_df.index)}')
interm_term_df.head()

The number of rows of data for 2-year Treasury is 511


Unnamed: 0,date,it_interest
0,1976-06-01,7.06
1,1976-07-01,6.85
2,1976-08-01,6.63
3,1976-09-01,6.42
4,1976-10-01,5.98


In [4]:
# Long term interest rates are represented by the 10-year US Treasury Yield
long_term_csv = "Interest_Rates/US_Treasury/GS10.csv"
long_term_df = pd.read_csv (long_term_csv)
long_term_df.rename(columns={"DATE":"date", "VALUE":"lt_interest"}, inplace=True)
print(f'The number of rows of data for 10-year Treasury is {len(long_term_df.index)}')
long_term_df.head()

The number of rows of data for 10-year Treasury is 789


Unnamed: 0,date,lt_interest
0,1953-04-01,2.83
1,1953-05-01,3.05
2,1953-06-01,3.11
3,1953-07-01,2.93
4,1953-08-01,2.95


In [5]:
# Merge the Short, Intermediate and Long Term Treasury Yields into one dataframee
merged_si = pd.merge (short_term_df, interm_term_df, on="date")
interest_rates_df = pd.merge (merged_si, long_term_df, on="date")
print(f'The number of rows of data for all interest rates is {len(interest_rates_df.index)}')
interest_rates_df.head()

The number of rows of data for all interest rates is 444


Unnamed: 0,date,st_interest,it_interest,lt_interest
0,1982-01-01,12.92,14.57,14.59
1,1982-02-01,14.28,14.82,14.43
2,1982-03-01,13.31,14.19,13.86
3,1982-04-01,13.34,14.2,13.87
4,1982-05-01,12.71,13.78,13.62


Inflation

In [6]:
# Read the CPI Inflation data
inflation_csv = "CPI_Inflation/data/cpiai_csv.csv"
inflation_df = pd.read_csv (inflation_csv)
inflation_df.rename(columns={"Date":"date", "Index":"CPI_Index", 
                             "Inflation":"one_month_inflation"}, inplace=True)
print(f'The number of rows of data for CPI Inflation is {len(inflation_df.index)}')
inflation_df.head()

The number of rows of data for CPI Inflation is 1213


Unnamed: 0,date,CPI_Index,one_month_inflation
0,1913-01-01,9.8,
1,1913-02-01,9.8,0.0
2,1913-03-01,9.8,0.0
3,1913-04-01,9.8,0.0
4,1913-05-01,9.7,-1.02


In [7]:
# Merge the Interest Rates and Inflation into one dataframee
int_and_infl_df = pd.merge (interest_rates_df, inflation_df, on="date")
print(f'The number of rows of data for interest rates and inflation is {len(int_and_infl_df.index)}')
int_and_infl_df.tail()

The number of rows of data for interest rates and inflation is 385


Unnamed: 0,date,st_interest,it_interest,lt_interest,CPI_Index,one_month_inflation
380,2013-09-01,0.02,0.4,2.81,234.149,0.12
381,2013-10-01,0.05,0.34,2.62,233.546,-0.26
382,2013-11-01,0.07,0.3,2.72,233.069,-0.2
383,2013-12-01,0.07,0.34,2.9,233.049,-0.01
384,2014-01-01,0.04,0.39,2.86,233.916,0.37


Note to Avg Coders - I am going to try and find a more complete history of CPI Inflation.  The one I have appears to end in January of 2014.  We should have data at least through December 2018.