In [None]:
import pandas as pd
import numpy as np
from statsmodels.miscmodels.ordinal_model import OrderedModel
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sns
import matplotlib.pyplot as plt
sys.path.insert(0, './helper_scripts_for_model')
from ordinal_regression_model import *

# Reading in the cleaned model data
modelData = pd.read_csv("CleanedDataCSVs/cleaned_model_data.csv")

# Converting the star ratings into a categorical data type
modelData = convert_star_rating(modelData)

# Specifying the star ratings in a list
stars = ['1.0', '2.0', '3.0', '4.0', '5.0']

# Setting the predictor attributes for the model
pred_attr = ['units','median','num_500k_to_1m','num_1m_or_more','median_listing_price_per_square_foot']

# Splitting the model data into training data and test data and returning a tuple that contains two tuples:
#	The first tuple contains the predictor arrays for the training and test data
#	The second tuple contains the target for the training and test data
pred_targ = split_train_test(modelData, pred_attr)

# Training the logit and probit models and returning a tuple that contains two tuple:
#	The first tuple contains the model fit for the logit and probit methods
#	The second tuple contains the predicted choice arrays for the logit and probit models
models = train_models(pred_targ)

In [None]:
# Calculating accuracy for both models
logit_accuracy = (pred_targ[0][1].values == models[1][0]).mean()
probit_accuracy = (pred_targ[0][1].values == models[1][1]).mean()

# Printing the results
print('Logit Proportion of Correct Predictions: ', logit_accuracy)
print('Probit Proportion of Correct Predictions: ', probit_accuracy)

In [None]:
# Printing the full summary for both models
print('Logit Full Summary:\n', models[0][0].summary())
print('\nProbit Full Summary:\n', models[0][1].summary())

In [None]:
# Printing the classification report for both models
print("Logit Model Classification Report:\n", classification_report(pred_targ[0][1], models[1][0]))
print("Probit Model Classification Report:\n", classification_report(pred_targ[0][1], models[1][1]))

In [None]:
# Creating the confusion matrix for the logit model
logit_conf_matrix = confusion_matrix(pred_targ[0][1], models[1][0])

# Creating a plot of the confusion matrix
df_log_cm = pd.DataFrame(logit_conf_matrix, stars, stars)
plt.figure(figsize=(10,7))
sns.set_theme(font_scale=2.0) # for label size
sns.heatmap(df_log_cm, annot=True, annot_kws={"size": 30}, cmap  ="Blues") # font size
plt.xlabel("Predicted Star Ratings", fontsize=24)
plt.ylabel("Actual Star Ratings", fontsize=24)
plt.title("Confusion Matrix for Logit Model", fontsize=28)
plt.show()

In [None]:
# Creating the confusion matrix for the probit model
probit_conf_matrix = confusion_matrix(pred_targ[0][1], models[1][1])

# Creating a plot of the confusion matrix
df_prob_cm = pd.DataFrame(probit_conf_matrix, stars, stars)
plt.figure(figsize=(10,7))
sns.set_theme(font_scale=2.0) # for label size
sns.heatmap(df_prob_cm, annot=True, annot_kws={"size": 30}, cmap  ="Blues") # font size
plt.xlabel("Predicted Star Ratings", fontsize=24)
plt.ylabel("Actual Star Ratings", fontsize=24)
plt.title("Confusion Matrix for Probit Model", fontsize=28)
plt.show()