In [None]:
# Importing the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Reading in the data from FRED and storing it in a dataframe
Household_Income_Path = "Resources/Real Median Household Income.csv"
Real_Median_Household_Income = pd.DataFrame(pd.read_csv(Household_Income_Path))
Price_of_a_Home_Path = "Resources/Utah State House Price Index.csv"
Average_Price_of_a_Home = pd.DataFrame(pd.read_csv(Price_of_a_Home_Path))
Population_Path = "Resources/Resident Population in Utah.csv"
Population = pd.DataFrame(pd.read_csv(Population_Path))
display(Real_Median_Household_Income.head())
display(Average_Price_of_a_Home.head())
display(Population.head())

In [None]:
# Manipulating the Data:

# Change values in DATE column to datetime:
Real_Median_Household_Income['DATE'] = pd.to_datetime(Real_Median_Household_Income['DATE'])
Average_Price_of_a_Home['DATE'] = pd.to_datetime(Average_Price_of_a_Home['DATE'])
Population['DATE'] = pd.to_datetime(Population['DATE'])

# Merge the dataframes on the DATE column:
merged_df_2_of_3 = pd.merge(Population, Average_Price_of_a_Home, on="DATE", how="outer")
merged_df = pd.merge(merged_df_2_of_3, Real_Median_Household_Income, on="DATE", how="outer")

#Reanme the columns:
merged_df = merged_df.rename(columns={"DATE":"ds", "UTPOP":"Population in Thousands", "UTSTHPI":"House Price Index in Percentage of 1980 Median Home Price", "MEHOINUSUTA672N":"Median Household Income Per Year in Thousands of Dollars"})

# Display the dataframe:
display(merged_df.head())

# Drop the rows with NaN values:
condensed_df = merged_df.dropna()

# Reset the index:
condensed_df = condensed_df.reset_index(drop=True)

# Display the dataframe:
display(condensed_df.head())

# Drop the column ds and store in a new dataframe:
filled_df = merged_df.drop(columns="ds")

# Reset the index:
filled_df = filled_df.reset_index(drop=True)

# Display the dataframe:
display(filled_df.head())

# Fill in the NaN values with the rolling mean: 
filled_df = filled_df.fillna(filled_df.rolling(5, min_periods=1).mean())

# Add the ds column back to the dataframe and set it as the first column:
filled_df.insert(0, "ds", merged_df["ds"])

# Drop all values before January 1, 2000:
filled_df = filled_df[~(filled_df['ds'] < '2000-01-01')]
filled_df = filled_df.reset_index(drop=True)

# Display the dataframe:
display(filled_df.head())

# Save the dataframe to a csv file:
filled_df.to_csv("Resources/filled_df.csv", index=False, header=True)

# Display the dataframe:
display(filled_df)


In [None]:
# Visualizing the Data:

# Plot the data as a scatter plot:
plt.figure(figsize=(15, 10))
plt.scatter(filled_df['ds'], filled_df['Population in Thousands'], color='blue', label='Population in Thousands')
plt.title('Population in Utah Over Time')
plt.xlabel('Year')
plt.ylabel('Population in Thousands')
plt.legend()
plt.show()

# Plot the data as a scatter plot:
plt.figure(figsize=(15, 10))
plt.scatter(filled_df['ds'], filled_df['House Price Index in Percentage of 1980 Median Home Price'], color='red', label='House Price Index in Percentage of 1980 Median Home Price')
plt.title('House Price Index in Utah Over Time')
plt.xlabel('Year')
plt.ylabel('House Price Index in Percentage of 1980 Median Home Price')
plt.legend()
plt.show()

# Plot the data as a scatter plot:
plt.figure(figsize=(15, 10))
plt.scatter(filled_df['ds'], filled_df['Median Household Income Per Year in Thousands of Dollars'], color='crimson', label='Median Household Income Per Year in Thousands of Dollars')
plt.title('Median Household Income in Utah Over Time')
plt.xlabel('Year')
plt.ylabel('Median Household Income Per Year in Thousands of Dollars')
plt.legend()
plt.show()