In [217]:
# Required Modules
import pandas as pd
import sqlite3
import os
import datetime as dt

# ML Input Data
input_path = os.path.abspath('../Resources/Clean_Data/Final_Data.csv')
input_df = pd.read_csv(input_path, header=0)

# ML VAR Model Output
VAR_path = os.path.abspath('../Resources/ML_Outputs/var_model_predict.csv')
VAR_df = pd.read_csv(VAR_path, header=0).drop(columns={'Unnamed: 0'})

# ML ARIMA Model Output
ARIMA_path = os.path.abspath('../Resources/ML_Outputs/April_2021_preds_by_zip.csv')
ARIMA_df = pd.read_csv(ARIMA_path, header=0)

# Tableau Data
tableau_path = os.path.abspath('../Resources/Clean_Data/Tableau_Data.csv')

In [218]:
# Set Input file column order
input_df['State'] = 'Florida'
input_df = input_df[['State','Zip_Code','Date','Date_Code','Sale_Price','Interest_Rate', 'Property_Tax', 'Rent_Price', 'Household_Income', 'Rent_Affordability', 'Total_Sales', 'FHA_Count', 'Mobility_Rate', 'School_Rating', 'School_Grade', 'Owner_Occupied', 'Renter_Occupied', 'Total_Vacant', 'Total_Dwellings', 'FTE_Employed', 'Unemployed', 'Expense_Index', 'Average_Commute', 'Crime_Index', 'Loan_Amount', 'Loan_Term', 'Loan_R', 'Loan_Payment', 'Home_Affordability']]
input_df['Date'] = pd.to_datetime(input_df['Date'])
input_df.dtypes

State                         object
Zip_Code                       int64
Date                  datetime64[ns]
Date_Code                      int64
Sale_Price                   float64
Interest_Rate                float64
Property_Tax                   int64
Rent_Price                     int64
Household_Income             float64
Rent_Affordability           float64
Total_Sales                    int64
FHA_Count                      int64
Mobility_Rate                float64
School_Rating                  int64
School_Grade                  object
Owner_Occupied                 int64
Renter_Occupied                int64
Total_Vacant                   int64
Total_Dwellings                int64
FTE_Employed                   int64
Unemployed                     int64
Expense_Index                  int64
Average_Commute                int64
Crime_Index                    int64
Loan_Amount                  float64
Loan_Term                    float64
Loan_R                       float64
L

In [219]:
# Set ARIMA model columns
ARIMA_df['Date_Code'] = 202104
ARIMA_df['Date'] = dt.datetime.strptime('2021-04-01','%Y-%m-%d')
ARIMA_df['State'] = 'Florida'
ARIMA_df = ARIMA_df[['State','Zip_Code','Date','Date_Code']]
ARIMA_df.dtypes

State                object
Zip_Code              int64
Date         datetime64[ns]
Date_Code             int64
dtype: object

In [220]:
# Set VAR model columns
VAR_df['Date_Code'] = 202101
VAR_df['Date'] = dt.datetime.strptime('2021-01-01','%Y-%m-%d')
VAR_df['State'] = 'Florida'
# VAR_df = VAR_df.rename(columns={})
VAR_df = VAR_df[['State','Zip_Code','Date','Date_Code',
                'Testing_Total_Sales', 'Testing_FHA_Count',
                'Testing_Sale_Price', 'Total_Sales_Prediction', 'FHA_Count_Prediction',
                'Sale_Price_Prediction', 'Sales_Mean_Error', 'FHA_Count_Mean_Error',
                'Sale_Price_Mean_Error']]
VAR_df.dtypes

State                             object
Zip_Code                           int64
Date                      datetime64[ns]
Date_Code                          int64
Testing_Total_Sales              float64
Testing_FHA_Count                float64
Testing_Sale_Price               float64
Total_Sales_Prediction           float64
FHA_Count_Prediction             float64
Sale_Price_Prediction            float64
Sales_Mean_Error                 float64
FHA_Count_Mean_Error             float64
Sale_Price_Mean_Error            float64
dtype: object

In [221]:
# Merge into one DataFrame
final_df = input_df.merge(ARIMA_df, how='outer' ,on=['State','Zip_Code','Date','Date_Code'], suffixes=('','_ARIMA'))
final_df = final_df.merge(VAR_df, how='outer' ,on=['State','Zip_Code','Date','Date_Code'], suffixes=('','_VAR'))
final_df.dtypes

State                             object
Zip_Code                           int64
Date                      datetime64[ns]
Date_Code                          int64
Sale_Price                       float64
Interest_Rate                    float64
Property_Tax                     float64
Rent_Price                       float64
Household_Income                 float64
Rent_Affordability               float64
Total_Sales                      float64
FHA_Count                        float64
Mobility_Rate                    float64
School_Rating                    float64
School_Grade                      object
Owner_Occupied                   float64
Renter_Occupied                  float64
Total_Vacant                     float64
Total_Dwellings                  float64
FTE_Employed                     float64
Unemployed                       float64
Expense_Index                    float64
Average_Commute                  float64
Crime_Index                      float64
Loan_Amount     

In [222]:
final_df.tail()

Unnamed: 0,State,Zip_Code,Date,Date_Code,Sale_Price,Interest_Rate,Property_Tax,Rent_Price,Household_Income,Rent_Affordability,...,Home_Affordability,Testing_Total_Sales,Testing_FHA_Count,Testing_Sale_Price,Total_Sales_Prediction,FHA_Count_Prediction,Sale_Price_Prediction,Sales_Mean_Error,FHA_Count_Mean_Error,Sale_Price_Mean_Error
26575,Florida,34994,2021-04-01,202104,,,,,,,...,,,,,,,,,,
26576,Florida,34996,2021-04-01,202104,,,,,,,...,,,,,,,,,,
26577,Florida,34997,2021-04-01,202104,,,,,,,...,,,,,,,,,,
26578,Florida,32680,2021-01-01,202101,,,,,,,...,,5.0,0.0,130000.0,9.510791,1.347094,62420.910488,0.970014,0.648398,0.567897
26579,Florida,32680,2021-01-01,202101,,,,,,,...,,2.0,2.0,50000.0,9.492123,1.143768,69327.495238,0.970014,0.648398,0.567897


In [223]:
# Write combined data to csv
final_df.to_csv(tableau_path, index=False)