# Natural Language Processing

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tabulate import tabulate

file = [
          "train",
          "mbti",
          "mbti_2000",
          "Restaurant_Reviews"
       ]

# Change x & y When Changing Dataset
fileName = f"Data/{file[1]}.csv"
print("Opened File : ",fileName)
dataset = pd.read_csv(fileName)

likedColumn = 0
DataColumnName = "posts"

Opened File :  Data/mbti.csv


## Cleaning the texts

In [2]:
import re  # Tool for matching patterns in text
import nltk  # Natural Language Toolkit (NLTK)

# Download necessary NLTK resources (Run If not Downloaded)
# nltk.download('stopwords')
# nltk.download('words')  # English words corpus

from nltk.corpus import stopwords

from nltk.stem.porter import PorterStemmer

# Storing Clean Review
corpus = []

# Assuming 'dataset' is your DataFrame containing reviews
N = len(dataset)
english_words = set(nltk.corpus.words.words())  # Set of English words

for i in range(N):
    #print("\nMainData : ", dataset[DataColumnName][i])

    # Replacing Non-Letter Characters with Space in the 'Review' column of i'th Row
    review = re.sub('[^a-zA-Z]', ' ', dataset[DataColumnName][i])
    #print("\nReview 1 : ", review)

    # Convert to lowercase
    review = review.lower()
    #print("\nReview 2 : ", review)

    # Split into words
    review = review.split()
    #print("\nReview 3 : ", review)

    # Stemming and remove stopwords not in English words corpus
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if word in english_words]
    #print("\nReview 4 : ", review)

    # Join words back into a sentence
    review = ' '.join(review)
    #print("\nReview 5 : ", review)

    corpus.append(review)


## Creating the Bag of Words model

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()

# xSM is sparse Matrix Representation
xSM = cv.fit_transform(corpus)
#print("Sparse Matrix :\n",xSM)

# Making Document-Term Matrix Representation
xDTM = xSM.toarray()
#print("\nDocument-Term Matrix Matrix : \n", xDTM)

print("Total Column : ",len(xDTM[0]))

Total Column :  20648


## Printing All Unique Word

In [4]:
feature_names = cv.get_feature_names_out()
for i in range(0,len(feature_names)):
  print(feature_names[i])

aa
aal
aardvark
aba
aback
abandon
abas
abash
abat
abattoir
abb
abbasi
abbess
abbey
abbrevi
abdomen
abdomin
abduct
abe
aberr
abet
abhor
abhorr
abid
abiding
abigail
abil
abiogenesi
abject
abjur
abl
ablaz
abneg
abnorm
aboard
abod
abolish
abolit
abomin
aborigin
abort
abound
about
abov
abracadabra
abras
abraxa
abridg
abroad
abrupt
abruptli
absciss
absenc
absent
absente
absentmindedli
absinth
absolut
absolutist
absolv
absorb
absorbed
absorpt
abstain
abstin
abstract
abstracted
abstractli
absurd
absurdli
abu
abund
abundantli
abus
abut
abysm
abyss
academ
academi
academist
acatalept
acced
acceler
accent
accentu
accept
access
accessori
accid
accident
acclam
acclim
accolad
accommod
accompani
accompanist
accomplic
accomplish
accord
accordingli
accordion
accost
account
accredit
accru
accumul
accur
accuraci
accurs
accus
accusatori
accusingli
accustom
ace
acedia
acerb
acet
acetyl
acetylcholin
acetylen
ach
achi
achiev
achromat
acid
acidosi
acker
acknowledg
acl
acm
acn
aconit
acorn
acoust
acquaint
acqua

## Splitting the dataset into the Training set and Test set

In [5]:
# Saving Liked Value in y
yNLP = dataset.iloc[0:N,[likedColumn]].values
print("Y : ",yNLP)

from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split(xDTM, yNLP, test_size = 0.2)


Y :  [['INFJ']
 ['ENTP']
 ['INTP']
 ...
 ['INTP']
 ['INFP']
 ['INFP']]


## Training the Naive Bayes model on the Training set

In [6]:
# Naive Base Is Good For Natural Language Processing
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(xTrain, yTrain)

yPred = classifier.predict(xTest).ravel()

  y = column_or_1d(y, warn=True)


## Making the Confusion Matrix

In [7]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(yTest, yPred)
print("Confusion Matrix : \n",cm)
AS = accuracy_score(yTest, yPred)
print("Acuracy Score : ",AS*100,"%")

Confusion Matrix : 
 [[  0   5   0   1   0   0   0   0  10  18   7   1   0   1   0   0]
 [  0   9   0   6   0   0   0   1  29  34  19  28   0   0   0   0]
 [  0   1   0   1   0   0   0   0  11   9  12  10   0   0   0   0]
 [  0   4   0   9   0   0   0   0  34  38  22  37   0   0   0   0]
 [  0   1   0   0   0   0   0   0   4   0   2   1   0   0   0   0]
 [  0   1   1   0   0   0   0   0   2   0   2   0   0   0   0   0]
 [  0   1   0   0   0   0   0   0   1   1   1   0   0   0   0   0]
 [  0   2   0   1   0   0   0   0   2   5   2   3   0   0   0   0]
 [  0  12   0  17   0   0   0   0  99  83  43  45   0   0   0   1]
 [  0  14   0  15   0   0   0   0  94 109  49  61   0   1   0   0]
 [  0  13   0  13   0   0   0   0  39  57  47  49   0   0   0   0]
 [  0  11   2  13   0   0   0   0  51  68  44  72   0   1   0   0]
 [  0   3   0   3   0   0   0   0  11  12   5   4   0   0   0   1]
 [  0   2   0   4   0   0   0   0  14  16  12  14   0   0   0   0]
 [  0   3   0   2   0   0   0   0   9   5

### Concatenating xDTM and y and Forming a CSV File

In [8]:
concatenated = np.hstack((xDTM, yNLP.reshape(-1,1)))

# Convert the concatenated array to a DataFrame
# df = pd.DataFrame(concatenated)

# Save the DataFrame to a CSV file
# df.to_csv('Output/reaction.csv', index=False, header=False)


## Classification Model Selection

In [9]:
# Change x & y When Changing Dataset
dataset = concatenated
x = dataset[:, :-1]
y = dataset[:, -1]

# print("\nX :")
# print(tabulate(x,tablefmt="grid"))
# print("\nY : ",y)

# Array Initialization
dataArray = []

### Missing Data Handling

In [10]:
# Missing Data Hadling For Any Dataset
# from sklearn.impute import SimpleImputer
# imputer = SimpleImputer(missing_values=np.nan, strategy='median')
# for col_idx in range(x.shape[-1]):
#   if np.issubdtype(type(x[:,col_idx][0]), np.number):
#     imputer.fit(x[:, [col_idx]])
#     x[:, [col_idx]] = imputer.transform(x[:, [col_idx]])  


# print("\nX : After Handling Missing Data ")
# print(tabulate(x,tablefmt="grid")) 

### Encoding Categorical Data

In [11]:
# Encode categorical data X

# One Hot Encoding of X
"""
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

for col_idx in range(x.shape[1]):
  if np.issubdtype(type(x[:,col_idx][0]), np.number):
    pass
  else:
    ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [col_idx])], remainder='passthrough')
    x = np.array(ct.fit_transform(x))
"""
# Label Encoding of X
from sklearn.preprocessing import LabelEncoder
xLE = LabelEncoder()

for col_idx in range(x.shape[1]):
  if np.issubdtype(type(x[:,col_idx][0]), np.number):
    pass
  else:
    x[:,col_idx]=xLE.fit_transform(x[:,col_idx])

# print("\nX : After Encoding")
# print(tabulate(x,tablefmt="grid"))

# Encode categorical data Y
from sklearn.preprocessing import LabelEncoder
yLE = LabelEncoder()
if np.issubdtype(type(y[0]), np.number):
  pass
else:
  y=yLE.fit_transform(y)

# print("\nY After Encoding : ",y)


### Training Dataset

In [12]:
from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 0.2)

### Feature Scaling

In [13]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

# STD stands for Standardized Value
xTrainSTD = sc.fit_transform(xTrain)
xTestSTD = sc.fit_transform(xTest)

### Logistic Regression(Classification) Model

In [14]:
from sklearn.linear_model import LogisticRegression
LRClassifier = LogisticRegression(random_state = 0)
LRClassifier.fit(xTrainSTD,yTrain)

yLR_Pred = LRClassifier.predict(xTestSTD)

from sklearn.metrics import confusion_matrix, accuracy_score
LR_CM = confusion_matrix(yTest, yLR_Pred)
print("Confusion Matrix : \n",LR_CM)

LR_AS = accuracy_score(yTest, yLR_Pred)
print("\nAccuracy Score Of Logistic Regression Model : ",LR_AS*100,"%")

dataArray.append(("Logistic Regression Model ", LR_AS*100))

Confusion Matrix : 
 [[  1   7   1   1   0   0   0   0  13  10   2   1   0   0   0   1]
 [  3  25   2   9   0   0   0   1  25  37  10  12   2   6   1   0]
 [  1   3   4   3   0   0   0   1   6   2  11   9   0   0   1   2]
 [  3  19   0  28   0   0   0   0  16  10  21  47   1   3   2   4]
 [  0   1   0   0   0   0   0   0   2   2   0   2   0   0   1   0]
 [  0   2   0   2   0   0   0   0   1   2   2   1   0   0   0   1]
 [  0   1   0   0   0   0   1   0   0   2   4   3   0   0   0   0]
 [  1   1   1   1   0   0   0   0   4   1   2   2   0   1   0   2]
 [  8  14   6  13   0   0   0   0 110  69  28  21   4   5   2   4]
 [  3  39   1   3   0   0   0   1  69 192  29  25   4   7   5   6]
 [  0  11   1  14   0   2   0   1  32  10  72  46   0   2   4   6]
 [  1  13   4  17   1   0   0   1  17  39  49  93   1   1   3   6]
 [  2   3   0   1   0   0   0   0   7   7   4   2   2   1   0   0]
 [  1   8   1   1   0   0   0   0  10  27   5   8   0   3   0   2]
 [  0   4   1   5   0   0   0   0   6   5

### K-Nearest Neighbour Classification Model

In [15]:
from sklearn.neighbors import KNeighborsClassifier
KNNClassifier = KNeighborsClassifier(n_neighbors = 15, metric = 'minkowski', p = 2)
KNNClassifier.fit(xTrainSTD,yTrain)

yKNN_Pred = KNNClassifier.predict(xTestSTD)

from sklearn.metrics import confusion_matrix, accuracy_score
KNN_CM = confusion_matrix(yTest, yKNN_Pred)
print("Confusion Matrix : \n",KNN_CM)

KNN_AS = accuracy_score(yTest, yKNN_Pred)

print("\nAccuracy Score Of K-Nearest Neighbour Model : ",KNN_AS*100,"%")

dataArray.append(("K-Nearest Neighbour Model", KNN_AS*100))


Confusion Matrix : 
 [[  0   9   0   0   0   0   0   0   0   0   7  21   0   0   0   0]
 [  0  49   0   0   0   0   0   0   1   0  24  59   0   0   0   0]
 [  0  19   0   0   0   0   0   0   0   0   5  19   0   0   0   0]
 [  0  44   0   0   0   0   0   0   1   1  23  85   0   0   0   0]
 [  0   3   0   0   0   0   0   0   1   0   1   3   0   0   0   0]
 [  0   6   0   0   0   0   0   0   0   0   1   4   0   0   0   0]
 [  0   6   0   0   0   0   0   0   0   0   1   4   0   0   0   0]
 [  0   4   0   0   0   0   0   0   0   0   1  11   0   0   0   0]
 [  0 101   0   0   0   0   0   0   3   0  41 139   0   0   0   0]
 [  0 124   0   0   0   0   0   0   5   1  53 201   0   0   0   0]
 [  0  62   0   0   0   0   0   0   1   1  31 106   0   0   0   0]
 [  0  63   0   0   0   0   0   0   0   1  37 145   0   0   0   0]
 [  0  13   0   0   0   0   0   0   1   0   3  12   0   0   0   0]
 [  0  19   0   0   0   0   0   0   0   1   9  37   0   0   0   0]
 [  0  15   0   0   0   0   0   0   0   1

### Support Vector Classification Model

In [16]:
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

# from sklearn.svm import SVC
# SVClassifier = SVC(kernel = 'linear', random_state = 0)
# SVClassifier.fit(xTrainSTD,yTrain)

# ySVC_Pred = SVClassifier.predict(xTestSTD)

# from sklearn.metrics import confusion_matrix, accuracy_score
# SVC_CM = confusion_matrix(yTest, ySVC_Pred)
# print("Confusion Matrix : \n",SVC_CM)

# SVC_AS = accuracy_score(yTest, ySVC_Pred)
# print("\nAccuracy Score Of SVC Model : ",SVC_AS*100,"%")

# dataArray.append(("Support Vecotor Classification Model", SVC_AS*100))

### Kernel_SVM Model

In [17]:
# from sklearn.svm import SVC
# KSVClassifier = SVC(kernel = 'rbf', random_state = 0)
# KSVClassifier.fit(xTrainSTD,yTrain)

# yKSVC_Pred = KSVClassifier.predict(xTestSTD)

# from sklearn.metrics import confusion_matrix, accuracy_score
# KSVC_CM = confusion_matrix(yTest, yKSVC_Pred)
# print("Confusion Matrix : \n",KSVC_CM)

# KSVC_AS = accuracy_score(yTest, yKSVC_Pred)
# print("\nAccuracy Score Of Kernel SVC Model : ", KSVC_AS*100,"%")

# dataArray.append(("kernel SVC Model", KSVC_AS*100))

### Naive Bayes Classification Model

In [18]:
from sklearn.naive_bayes import GaussianNB
NBClassifier = GaussianNB()
NBClassifier.fit(xTrainSTD,yTrain)

yNBC_Pred = NBClassifier.predict(xTestSTD)

from sklearn.metrics import confusion_matrix, accuracy_score
NBC_CM = confusion_matrix(yTest, yNBC_Pred)

print("Confusion Matrix : \n",NBC_CM)

NBC_AS = accuracy_score(yTest, yNBC_Pred)
print("\nAccuracy Score Of Naive Bayes Classification Model : ",NBC_AS*100,"%")

dataArray.append(("Naive Bayes Classification Model", NBC_AS*100))

Confusion Matrix : 
 [[  0   0   0   0   0   0   0   0  10  22   0   5   0   0   0   0]
 [  0   3   0   1   0   0   0   0  30  59  14  26   0   0   0   0]
 [  0   0   0   2   0   0   0   0  12  13   5  11   0   0   0   0]
 [  0   2   0   6   0   0   0   0  29  78  15  24   0   0   0   0]
 [  0   0   0   1   0   0   0   0   3   2   2   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   5   5   0   1   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   5   5   1   0   0   0   0]
 [  0   2   0   0   0   0   0   0   3   7   3   1   0   0   0   0]
 [  1   3   0   9   0   0   0   0  69 130  27  45   0   0   0   0]
 [  1   5   0   9   0   0   0   0  98 184  24  62   0   0   0   1]
 [  0   2   1   5   0   0   0   0  51  81  22  38   0   0   0   1]
 [  1   3   1   2   0   0   0   0  54 103  26  55   0   1   0   0]
 [  0   0   0   1   0   0   0   0   3  11   6   8   0   0   0   0]
 [  0   2   1   2   0   0   0   0  13  31   4  13   0   0   0   0]
 [  0   1   0   1   0   0   0   0   4  26

### Decision Tree Classification Model

In [19]:
# https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
DTClassifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
DTClassifier.fit(xTrainSTD,yTrain)

yDTC_Pred = DTClassifier.predict(xTestSTD)

from sklearn.metrics import confusion_matrix, accuracy_score
DTC_CM = confusion_matrix(yTest, yDTC_Pred)
print("Confusion Matrix : \n",DTC_CM)

DTC_AS = accuracy_score(yTest, yDTC_Pred)
print("\nAccuracy Score Of Decision Tree Classification Model : ",DTC_AS*100,"%")

dataArray.append(("Decision Tree Classification Model",DTC_AS*100))

Confusion Matrix : 
 [[ 1  3  1  2  2  1  0  0 16  5  4  2  0  0  0  0]
 [ 3 18  2  6  0  0  1  0 19 36 16 21  5  3  1  2]
 [ 1  0  2  3  0  0  0  1  6  8 11  8  1  0  2  0]
 [ 3 14  1 19  1  0  1  5 22 23 20 28  7  2  5  3]
 [ 0  0  0  1  0  0  0  0  2  2  1  0  1  1  0  0]
 [ 1  1  1  1  0  1  0  1  0  2  0  1  0  1  0  1]
 [ 0  1  0  1  0  0  0  0  0  2  4  2  0  1  0  0]
 [ 0  0  0  1  0  0  1  1  2  4  4  1  0  1  0  1]
 [ 9 16 12 27  1  1  0  0 56 54 43 32  7  9  9  8]
 [ 4 30  5 25  2  2  1  6 89 97 43 36 13 11 11  9]
 [ 1 16  9 13  0  2  0  2 34 34 30 40  5  1  8  6]
 [ 5 14  7 18  0  2  1  5 30 45 38 52  2  7  8 12]
 [ 1  2  0  2  1  0  1  0  4  8  3  1  0  0  5  1]
 [ 0  4  3  7  0  1  0  0 13 16  4 10  3  1  1  3]
 [ 1  4  3  5  0  0  0  2  8  6  7  5  0  1  1  2]
 [ 2  3  2  3  0  2  1  3  9 10  8 14  3  1  3  3]]

Accuracy Score Of Decision Tree Classification Model :  16.253602305475503 %


### Random Forest Classification Model

In [20]:
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
from sklearn.ensemble import RandomForestClassifier
RFClassifier = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
RFClassifier.fit(xTrainSTD,yTrain)

yRFC_Pred = RFClassifier.predict(xTestSTD)

from sklearn.metrics import confusion_matrix, accuracy_score
RFC_CM = confusion_matrix(yTest, yRFC_Pred)
print("Confusion Matrix : \n",RFC_CM)

RFC_AS = accuracy_score(yTest, yRFC_Pred)
print("\nAccuracy Score Of Random Forest Classification Model : ",RFC_AS*100,"%")


dataArray.append(("Random Forest Classification Model", RFC_AS*100))

Confusion Matrix : 
 [[  0   0   0   0   0   0   0   0  11  24   1   1   0   0   0   0]
 [  0   0   0   0   0   0   0   0  35  88   0  10   0   0   0   0]
 [  0   0   0   0   0   0   0   0   9  19   4  11   0   0   0   0]
 [  0   0   0   0   0   0   0   0  31  76  10  37   0   0   0   0]
 [  0   0   0   0   0   0   0   0   5   2   0   1   0   0   0   0]
 [  0   0   0   0   0   0   0   0   2   7   0   2   0   0   0   0]
 [  0   0   0   0   0   0   0   0   2   5   2   2   0   0   0   0]
 [  0   0   0   0   0   0   0   0   5   8   1   2   0   0   0   0]
 [  0   0   0   0   0   0   0   0  82 177   8  17   0   0   0   0]
 [  0   1   0   0   0   0   0   0  66 279   6  32   0   0   0   0]
 [  0   1   0   0   0   0   0   0  51  94  16  39   0   0   0   0]
 [  0   0   0   0   0   0   0   0  45 118  21  62   0   0   0   0]
 [  0   0   0   0   0   0   0   0   8  18   1   2   0   0   0   0]
 [  0   0   0   0   0   0   0   0  12  45   4   5   0   0   0   0]
 [  0   0   0   0   0   0   0   0   7  28

### eXtreme Gradient Boosting Classifier

In [21]:
# Must Label Encoding Y for XGBC

def is_probably_label_encoded(y):
    unique_values = np.unique(y)
    return (unique_values.min() == 0) and (unique_values.max() == len(unique_values) - 1)

# Check if y is probably label encoded
is_label_encoded = is_probably_label_encoded(y)

if not is_label_encoded : 
  from sklearn.preprocessing import LabelEncoder
  yLE = LabelEncoder()
  yXGBC = yLE.fit_transform(y)
  # Split Data With Same rs Value
  from sklearn.model_selection import train_test_split
  xTrain, xTest, yTrain, yTest = train_test_split(x, yXGBC, test_size = 0.2)
  print("Label Encoded Y")

# Applying XGBoost
from xgboost import XGBClassifier
XGBC = XGBClassifier()
XGBC.fit(xTrain, yTrain)

from sklearn.metrics import confusion_matrix, accuracy_score
yXGBC_Pred = XGBC.predict(xTest)
XGBC_CM = confusion_matrix(yTest, yXGBC_Pred)
print("Confusion Matrix : \n",XGBC_CM)

XGBC_AS = accuracy_score(yTest, yXGBC_Pred)
print("\nAccuracy Score Of XGBoost Classification Model : ",XGBC_AS*100,"%")

dataArray.append(("XGBoost Classification Model", XGBC_AS*100))


Confusion Matrix : 
 [[  0   2   0   0   0   0   0   0  22   9   2   2   0   0   0   0]
 [  0  11   0   6   0   0   0   0  29  70   6  10   1   0   0   0]
 [  1   0   0   2   0   0   0   0  10   5  19   6   0   0   0   0]
 [  0   4   0  25   0   0   0   0  19  28  26  52   0   0   0   0]
 [  0   0   0   0   0   0   0   0   4   3   0   1   0   0   0   0]
 [  0   2   0   1   0   0   0   0   3   0   1   4   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   2   5   4   0   0   0   0]
 [  0   1   0   0   0   0   0   0   5   5   2   3   0   0   0   0]
 [  0   4   1   5   0   0   0   0 127  99  19  29   0   0   0   0]
 [  0  12   0   6   0   0   0   0  73 238  23  30   1   0   0   1]
 [  0   0   0   1   0   0   0   0  39  31  70  59   0   0   0   1]
 [  0   0   0   9   0   0   0   0  30  59  36 110   0   0   0   2]
 [  0   0   0   2   0   0   0   0  10  10   5   2   0   0   0   0]
 [  0   0   0   1   0   0   0   0  16  31   8  10   0   0   0   0]
 [  0   1   0   2   0   0   0   0  12   7

## Comparative Display

In [22]:
# Sorting the list by the R^2 Score
dataArray.sort(key=lambda pair: pair[1], reverse=True)

# Display the updated and sorted list
print("Updated list of pairs in sorted order:")
print(tabulate(dataArray, headers=["Classification Models", "Prediction Accuracy(%)"], tablefmt="grid"))

print("\nBest  Model : ",dataArray[0][0])
print("\nWorst Model : ",dataArray[-1][0])

Updated list of pairs in sorted order:
+------------------------------------+--------------------------+
| Classification Models              |   Prediction Accuracy(%) |
| XGBoost Classification Model       |                  33.9481 |
+------------------------------------+--------------------------+
| Logistic Regression Model          |                  31.4121 |
+------------------------------------+--------------------------+
| Random Forest Classification Model |                  25.3026 |
+------------------------------------+--------------------------+
| Naive Bayes Classification Model   |                  19.5965 |
+------------------------------------+--------------------------+
| Decision Tree Classification Model |                  16.2536 |
+------------------------------------+--------------------------+
| K-Nearest Neighbour Model          |                  13.1988 |
+------------------------------------+--------------------------+

Best  Model :  XGBoost Classificatio

#                Happy Ending