In [5]:


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

data1 = pd.read_csv("customer_booking_history.csv")
data1 = data1.fillna(method="bfill")
le = LabelEncoder()
for c in data1.columns:
    if data1[c].dtype == "object":
        data1[c] = le.fit_transform(data1[c])
X1 = data1.drop("repeat_customer", axis=1)
y1 = data1["repeat_customer"]
sc = StandardScaler()
X1 = sc.fit_transform(X1)
xtrain, xtest, ytrain, ytest = train_test_split(X1, y1, test_size=0.2)

models1 = [LogisticRegression(), RandomForestClassifier(), GradientBoostingClassifier(), GaussianNB(), SVC(probability=True)]

for m in models1:
    m.fit(xtrain, ytrain)
    ypred = m.predict(xtest)
    print(m.__class__.__name__)
    print("acc:", accuracy_score(ytest, ypred))
    print("prec:", precision_score(ytest, ypred))
    print("rec:", recall_score(ytest, ypred))
    print("f1:", f1_score(ytest, ypred))
    print()

data2 = pd.read_csv("employee_assignment_data.csv")
data2 = data2.fillna(method="bfill")
for c in data2.columns:
    if data2[c].dtype == "object":
        data2[c] = le.fit_transform(data2[c])
X2 = data2.drop("can_be_assigned", axis=1)
y2 = data2["can_be_assigned"]
X2 = sc.fit_transform(X2)
x2train, x2test, y2train, y2test = train_test_split(X2, y2, test_size=0.2)

models2 = [LogisticRegression(), RandomForestClassifier(), GradientBoostingClassifier(), GaussianNB(), SVC(probability=True)]

for m in models2:
    m.fit(x2train, y2train)
    y2pred = m.predict(x2test)
    print(m.__class__.__name__)
    print("acc:", accuracy_score(y2test, y2pred))
    print("prec:", precision_score(y2test, y2pred))
    print("rec:", recall_score(y2test, y2pred))
    print("f1:", f1_score(y2test, y2pred))
    print()

data3 = pd.read_csv("hyphomz_indian_cleaning_data.csv")
data3 = data3.fillna(method="bfill")
for c in data3.columns:
    if data3[c].dtype == "object":
        data3[c] = le.fit_transform(data3[c])
X3 = data3.drop("customer_returned", axis=1)
y3 = data3["customer_returned"]
X3 = sc.fit_transform(X3)
x3train, x3test, y3train, y3test = train_test_split(X3, y3, test_size=0.2)

models3 = [LogisticRegression(), RandomForestClassifier(), GradientBoostingClassifier(), GaussianNB(), SVC(probability=True)]

for m in models3:
    m.fit(x3train, y3train)
    y3pred = m.predict(x3test)
    print(m.__class__.__name__)
    print("acc:", accuracy_score(y3test, y3pred))
    print("prec:", precision_score(y3test, y3pred))
    print("rec:", recall_score(y3test, y3pred))
    print("f1:", f1_score(y3test, y3pred))
    print()


  data1 = data1.fillna(method="bfill")


LogisticRegression
acc: 0.575
prec: 0.4528301886792453
rec: 0.3
f1: 0.3609022556390977

RandomForestClassifier
acc: 0.68
prec: 0.5975609756097561
rec: 0.6125
f1: 0.6049382716049383

GradientBoostingClassifier
acc: 0.64
prec: 0.5425531914893617
rec: 0.6375
f1: 0.5862068965517241

GaussianNB
acc: 0.605
prec: 0.5106382978723404
rec: 0.3
f1: 0.3779527559055118

SVC
acc: 0.605
prec: 0.5106382978723404
rec: 0.3
f1: 0.3779527559055118

LogisticRegression
acc: 0.99
prec: 0.8
rec: 1.0
f1: 0.8888888888888888



  data2 = data2.fillna(method="bfill")


RandomForestClassifier
acc: 1.0
prec: 1.0
rec: 1.0
f1: 1.0

GradientBoostingClassifier
acc: 1.0
prec: 1.0
rec: 1.0
f1: 1.0

GaussianNB
acc: 0.76
prec: 0.14285714285714285
rec: 1.0
f1: 0.25

SVC
acc: 0.98
prec: 1.0
rec: 0.5
f1: 0.6666666666666666

LogisticRegression
acc: 0.78
prec: 0.5576923076923077
rec: 0.58
f1: 0.5686274509803921



  data3 = data3.fillna(method="bfill")


RandomForestClassifier
acc: 0.81
prec: 0.6
rec: 0.72
f1: 0.6545454545454545

GradientBoostingClassifier
acc: 0.8
prec: 0.5892857142857143
rec: 0.66
f1: 0.6226415094339622

GaussianNB
acc: 0.74
prec: 0.4852941176470588
rec: 0.66
f1: 0.559322033898305

SVC
acc: 0.765
prec: 0.5283018867924528
rec: 0.56
f1: 0.5436893203883495

