In [81]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import matplotlib.ticker as ticker 
from sklearn.cluster import KMeans
from datetime import datetime
from pytz import timezone
import seaborn as sn
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay, f1_score
from sklearn.tree import export_graphviz
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier

In [82]:
data = pd.read_csv("/home/machouba/Documents/CC_42/Train_knight.csv")
data['knight'] = data['knight'].map({'Jedi': 1, 'Sith': 0})

X = data.drop('knight', axis=1)
y = data['knight']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [83]:
knn = KNeighborsClassifier(n_neighbors=4)
knn = knn.fit(X_train, y_train)

In [84]:
GNB = GaussianNB()
GNB = GNB.fit(X_train, y_train)

In [85]:
rf = RandomForestClassifier()
rf = rf.fit(X_train, y_train)

In [86]:
knn_pred = knn.predict(X_test)
GNB_pred = GNB.predict(X_test)
rf_pred = rf.predict(X_test)

print("KNN Accuracy:", accuracy_score(y_test, knn_pred) * 100, "KNN Precision:", precision_score(y_test, knn_pred) * 100, "KNN Recall:", recall_score(y_test, knn_pred) * 100, "KNN F1-score:", f1_score(y_test, knn_pred) * 100)
print("GNB Accuracy:", accuracy_score(y_test, GNB_pred) * 100, "GNB Precision:", precision_score(y_test, GNB_pred) * 100, "GNB Recall:", recall_score(y_test, GNB_pred) * 100, "GNB F1-score:", f1_score(y_test, GNB_pred) * 100)
print("RF Accuracy:", accuracy_score(y_test, rf_pred) * 100, "RF Precision:", precision_score(y_test, rf_pred) * 100, "RF Recall:", recall_score(y_test, rf_pred) * 100, "RF F1-score:", f1_score(y_test, rf_pred) * 100)

KNN Accuracy: 95.0 KNN Precision: 96.7741935483871 KNN Recall: 90.9090909090909 KNN F1-score: 93.75
GNB Accuracy: 95.0 GNB Precision: 93.93939393939394 GNB Recall: 93.93939393939394 GNB F1-score: 93.93939393939394
RF Accuracy: 100.0 RF Precision: 100.0 RF Recall: 100.0 RF F1-score: 100.0


In [87]:
vot = VotingClassifier(estimators=[('KNN', knn), ('GNB', GNB), ('RF', rf)], voting='hard')
vot = vot.fit(X_train, y_train)
vot_pred = vot.predict(X_test)
print("VOTING Accuracy:", accuracy_score(y_test, vot_pred) * 100, "VOTING Precision:", precision_score(y_test, vot_pred) * 100, "VOTING Recall:", recall_score(y_test, vot_pred) * 100, "VOTING F1-score:", f1_score(y_test, vot_pred) * 100)

VOTING Accuracy: 98.75 VOTING Precision: 100.0 VOTING Recall: 96.96969696969697 VOTING F1-score: 98.46153846153847


In [None]:
data_test = pd.read_csv("/home/machouba/Documents/CC_42/Test_knight.csv")

result = vot.predict(data_test)

result = pd.DataFrame(result, columns=['knight'])
result['knight'] = result['knight'].map({1: 'Jedi', 0: 'Sith'})

result.to_csv('Voting.txt', index=False, header=False)