### Importing Relevant Modules

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBRegressor, XGBClassifier
from sklearn.metrics import mean_absolute_error, accuracy_score

### Creating Essential Data Structures

In [None]:
#Importing data
RegressionData = pd.read_csv('melb_housing.csv')
ClassificationData = pd.read_csv('credit_score.csv')

#Printing the available features
print(f"The available features for House Price Prediction are:\n {', '.join([i for i in RegressionData.columns])}\n")
print(f"The available features for Credit Score Prediction are:\n {', '.join([i for i in ClassificationData.columns])}\n")

#Defining target column
RegPred = RegressionData.Price
Classification = ClassificationData['Credit Score']

#Defining features from which the model will learn by dropping the unwanted features
X_Reg = RegressionData.drop(['Price', 'Method'], axis=1)
X_Class = ClassificationData.drop(['Credit Score'], axis=1)

#One Hot Encoding for categorical data
X_Reg = pd.get_dummies(X_Reg)
X_Class = pd.get_dummies(X_Class)

#Number Encode the Credit Score
Classification = Classification.replace({'Low': 0, 'Average': 1, 'High': 2})


#Dropping rows with NaN values and those rows in the target column
X_Reg = X_Reg.dropna(axis=0)
RegPred = RegPred[X_Reg.index]

X_Class = X_Class.dropna(axis=0)
Classification = Classification[X_Class.index]

print(f"Shape of Regression Data: {X_Reg.shape}\n")
print(f"Shape of Classification Data: {X_Class.shape}\n")

#Splitting data into training and validation data
train_X_Reg, val_X_Reg, train_y_Reg, val_y_Reg = train_test_split(X_Reg, RegPred, random_state=1)
train_X_Class, val_X_Class, train_y_Class, val_y_Class = train_test_split(X_Class, Classification, random_state=1)

The available features for House Price Prediction are:
 Suburb, Address, Rooms, Type, Price, Method, SellerG, Date, Distance, Postcode, Bedroom2, Bathroom, Car, Landsize, BuildingArea, YearBuilt, CouncilArea, Lattitude, Longtitude, Regionname, Propertycount

The available features for Credit Score Prediction are:
 Age, Gender, Income, Education, Marital Status, Number of Children, Home Ownership, Credit Score

Shape of Regression Data: (6830, 14074)

Shape of Classification Data: (164, 14)



### <u>Regression</u> - Training the models to predict Price

In [None]:
RandomForestModel = RandomForestRegressor(random_state=42)
RandomForestModel.fit(train_X_Reg, train_y_Reg)

AdaBoostModel = AdaBoostRegressor(random_state=42)
AdaBoostModel.fit(train_X_Reg, train_y_Reg)

GradientBoostingModel = GradientBoostingRegressor(random_state=42)
GradientBoostingModel.fit(train_X_Reg, train_y_Reg)

XGBModel = XGBRegressor(random_state=42)
XGBModel.fit(train_X_Reg, train_y_Reg)

### Random Forest Accuracy:

In [None]:
RandomForestPrediction = RandomForestModel.predict(val_X_Reg)
print("Random Forest Error: ", mean_absolute_error(val_y_Reg, RandomForestPrediction))

Random Forest Error:  168383.72243559724


### AdaBoost Accuracy:

In [None]:
AdaBoostPrediction = AdaBoostModel.predict(val_X_Reg)
print("AdaBoost Error: ", mean_absolute_error(val_y_Reg, AdaBoostPrediction))

AdaBoost Error:  527607.8045972564


### Gradient Boost Accuracy:

In [None]:
GradientBoostPrediction = GradientBoostingModel.predict(val_X_Reg)
print("Gradient Boost Error: ", mean_absolute_error(val_y_Reg, GradientBoostPrediction))

Gradient Boost Error:  186105.82731716233


### XGBoost Accuracy:

In [None]:
XGBoostPrediction = XGBModel.predict(val_X_Reg)
print("XGBoost Error: ", mean_absolute_error(val_y_Reg, XGBoostPrediction))

XGBoost Error:  170210.34643954918


### <u>Classification</u> - Training the models to predict Credit Score


In [None]:
RandomForestModel = RandomForestClassifier(random_state=42)
RandomForestModel.fit(train_X_Class, train_y_Class)

AdaBoostModel = AdaBoostClassifier(random_state=42)
AdaBoostModel.fit(train_X_Class, train_y_Class)

GradientBoostingModel = GradientBoostingClassifier(random_state=42)
GradientBoostingModel.fit(train_X_Class, train_y_Class)

XGBModel = XGBClassifier(random_state=42)
XGBModel.fit(train_X_Class, train_y_Class)

### Random Forest Accuracy:

In [None]:
RandomForestPrediction = RandomForestModel.predict(val_X_Class)
print("Random Forest Accuracy: ", round(accuracy_score(val_y_Class, RandomForestPrediction)*100, 2),"%")

Random Forest Accuracy:  97.56 %


### AdaBoost Accuracy:

In [None]:
AdaBoostPrediction = AdaBoostModel.predict(val_X_Class)
print("AdaBoost Accuracy: ", round(accuracy_score(val_y_Class, AdaBoostPrediction)*100, 2),"%")

AdaBoost Accuracy:  100.0 %


### Gradient Boost Accuracy:

In [None]:
GradientBoostPrediction = GradientBoostingModel.predict(val_X_Class)
print("Gradient Boost Accuracy: ", round(accuracy_score(val_y_Class, GradientBoostPrediction)*100, 2),"%")

Gradient Boost Accuracy:  97.56 %


### XGBoost Accuracy:

In [None]:
XGBoostPrediction = XGBModel.predict(val_X_Class)
print("XGBoost Accuracy: ", round(accuracy_score(val_y_Class, XGBoostPrediction)*100, 2),"%")

XGBoost Accuracy:  100.0 %
