In [3]:
# STEP 1: LOADING THE DATA FRAME

# importing relevent libraries
import pandas as pd
import seaborn as sns
import numpy as np
import statistics as stats
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import statsmodels.api as sn
import statsmodels.discrete.discrete_model as sm
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import roc_auc_score

# reading our data frame
data = pd.read_csv("House-Price.csv", header = 0)

# Transforming variables
sums = [data.dist1, data.dist2, data.dist3, data.dist4]
data['avg_dis'] = sum(sums) / len(sums)
del data['dist1']
del data['dist2']
del data['dist3']
del data['dist4']

# Removing outliers
data.n_hos_beds = data.n_hos_beds.fillna(data.n_hos_beds.mean())

# Adding Dummy variables
data = pd.get_dummies(data)
del data["airport_NO"]
del data["waterbody_None"]

In [4]:
# independent variable
x = data[['price']]
x.head()


# dependent variable
y = data['Sold']

# Linear Discriminant Analysis with SINGLE predictor

In [7]:
# forming our classifying object (based on linear discriminant analysis model)
clf_lda = LinearDiscriminantAnalysis()

# fitting our variables into our classifying object
clf_lda.fit(x,y)

# predicting values of dependent variable
y_pred_lda = clf_lda.predict(x)


array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

#  Linear Discriminant Analysis with MULTIPLE predictors

# Confusion Matrix and evaluating performance

In [6]:
# analyzing performance of classifying object based on confusion matrix
confusion_matrix(y, y_pred_lda)

array([[210,  66],
       [166,  64]], dtype=int64)