In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

In [2]:
# Reading the dataset
df_pca = pd.read_csv("Datasets/Encoded_data_with_PCA.csv")

In [3]:
# Classification for BoRace

# Creating the dataset after PCA for BoEth Class Prediction
df_BoRace = df_pca.dropna()

# Encoding (White)5 = > 0, (Non-White)1,2,3,4 => 1 
# Column 12 is BoRace Column
df_BoRace['12'] = np.where(df_BoRace['12'] == 5.0, 0.0, df_BoRace['12'])
df_BoRace['12'] = np.where(df_BoRace['12'] == 2.0, 1.0, df_BoRace['12'])
df_BoRace['12'] = np.where(df_BoRace['12'] == 3.0, 1.0, df_BoRace['12'])
df_BoRace['12'] = np.where(df_BoRace['12'] == 4.0, 1.0, df_BoRace['12']) 
#df_BoRace['BoRace'] = np.where(df_BoRace['BoRace'].isnull(), 1.0, df_BoRace['BoRace']) 

X = df_BoRace.iloc[:, :-1].values
y = df_BoRace.iloc[:, -1:].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [7]:
# Predicting the race, using XG Boosting Algorithm
BoRace_XGB_Model = XGBClassifier(objective='multi:softmax', num_class = len(df_BoRace['12'].unique()), random_state = 0)
BoRace_XGB_Model.fit(X_train, y_train.ravel())

y_pred_train = BoRace_XGB_Model.predict(X_train)
y_pred_test = BoRace_XGB_Model.predict(X_test)

In [8]:
# XG Boost Train Results
print(confusion_matrix(y_train,y_pred_train))
print(classification_report(y_train,y_pred_train))

[[30741   247]
 [ 2221   494]]
              precision    recall  f1-score   support

         0.0       0.93      0.99      0.96     30988
         1.0       0.67      0.18      0.29      2715

    accuracy                           0.93     33703
   macro avg       0.80      0.59      0.62     33703
weighted avg       0.91      0.93      0.91     33703



In [9]:
# XG Boost Test Results
print(confusion_matrix(y_test,y_pred_test))
print(classification_report(y_test,y_pred_test))

[[13179   107]
 [  945   214]]
              precision    recall  f1-score   support

         0.0       0.93      0.99      0.96     13286
         1.0       0.67      0.18      0.29      1159

    accuracy                           0.93     14445
   macro avg       0.80      0.59      0.63     14445
weighted avg       0.91      0.93      0.91     14445



In [10]:
# Predicting the race, using XG Boosting Algorithm
BoRace_NN_Model = MLPClassifier(hidden_layer_sizes=(32,4), activation='relu', solver='adam', \
                                learning_rate = 'constant', alpha = 0.00001, max_iter = 20000, random_state = 0)
BoRace_NN_Model.out_activation_ = 'softmax'
BoRace_NN_Model.fit(X_train,y_train.ravel())

y_pred_train = BoRace_NN_Model.predict(X_train)
y_pred_test = BoRace_NN_Model.predict(X_test)

In [11]:
# NN Train Results
print(confusion_matrix(y_train,y_pred_train))
print(classification_report(y_train,y_pred_train))

[[30584   404]
 [ 2077   638]]
              precision    recall  f1-score   support

         0.0       0.94      0.99      0.96     30988
         1.0       0.61      0.23      0.34      2715

    accuracy                           0.93     33703
   macro avg       0.77      0.61      0.65     33703
weighted avg       0.91      0.93      0.91     33703



In [12]:
# NN Test Results
print(confusion_matrix(y_test,y_pred_test))
print(classification_report(y_test,y_pred_test))

[[13104   182]
 [  891   268]]
              precision    recall  f1-score   support

         0.0       0.94      0.99      0.96     13286
         1.0       0.60      0.23      0.33      1159

    accuracy                           0.93     14445
   macro avg       0.77      0.61      0.65     14445
weighted avg       0.91      0.93      0.91     14445

