In [87]:
import sqlalchemy
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import matplotlib.ticker as ticker 
from sklearn.cluster import KMeans
from datetime import datetime
from pytz import timezone
import seaborn as sn
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.tree import export_graphviz
import graphviz
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier

In [88]:
data = pd.read_csv('/app/knight/Train_knight.csv')

data['knight'] = data['knight'].map({'Jedi': 1, 'Sith': 0})

trainAbilities = data[data.columns[:-1]]
trainKnight = data[data.columns[-1]]

abilitiesTrain, abilitiesTest, knightTrain, knightTest = train_test_split(trainAbilities, trainKnight, test_size=0.2)

In [89]:
neigh = KNeighborsClassifier(n_neighbors=4)
neigh.fit(trainAbilities, trainKnight)

In [90]:
rf = RandomForestClassifier()
rf.fit(abilitiesTrain, knightTrain)

In [91]:
model = GaussianNB()
model.fit(abilitiesTrain, knightTrain)

In [92]:
neigtPred = neigh.predict(abilitiesTest)
rfPred = rf.predict(abilitiesTest)
modelPred = model.predict(abilitiesTest)

In [93]:
accuracy = accuracy_score(knightTest, neigtPred)
precision = precision_score(knightTest, neigtPred)
recall = precision_score(knightTest, neigtPred)

f_score = (2 * precision * recall) / (precision + recall)

print(f'precision: {round(precision * 100)}%')
print(f'recall: {round(recall * 100)}%')
print(f'accuracy: {round(accuracy * 100)}%')
print(f'f1-score: {round(f_score * 100)}%')

precision: 97%
recall: 97%
accuracy: 96%
f1-score: 97%


In [94]:
accuracy = accuracy_score(knightTest, rfPred)
precision = precision_score(knightTest, rfPred)
recall = precision_score(knightTest, rfPred)

f_score = (2 * precision * recall) / (precision + recall)

print(f'precision: {round(precision * 100)}%')
print(f'recall: {round(recall * 100)}%')
print(f'accuracy: {round(accuracy * 100)}%')
print(f'f1-score: {round(f_score * 100)}%')

precision: 94%
recall: 94%
accuracy: 96%
f1-score: 94%


In [95]:
accuracy = accuracy_score(knightTest, modelPred)
precision = precision_score(knightTest, modelPred)
recall = precision_score(knightTest, modelPred)

f_score = (2 * precision * recall) / (precision + recall)

print(f'precision: {round(precision * 100)}%')
print(f'recall: {round(recall * 100)}%')
print(f'accuracy: {round(accuracy * 100)}%')
print(f'f1-score: {round(f_score * 100)}%')

precision: 94%
recall: 94%
accuracy: 96%
f1-score: 94%


In [96]:
estimator = []
estimator.append(('RandomForest', RandomForestClassifier() ))
estimator.append(('KNN', KNeighborsClassifier(n_neighbors=4) ))
estimator.append(('Gauss', GaussianNB() ))

VC_hard = VotingClassifier(estimators = estimator, voting ='hard')

VC_hard.fit(abilitiesTrain, knightTrain)
knightpred = VC_hard.predict(abilitiesTest)

In [97]:
accuracy = accuracy_score(knightTest, knightpred)
precision = precision_score(knightTest, knightpred)
recall = precision_score(knightTest, knightpred)

f_score = (2 * precision * recall) / (precision + recall)

print(f'precision: {round(precision * 100)}%')
print(f'recall: {round(recall * 100)}%')
print(f'accuracy: {round(accuracy * 100)}%')
print(f'f1-score: {round(f_score * 100)}%')

precision: 94%
recall: 94%
accuracy: 96%
f1-score: 94%


In [98]:
dataTest = pd.read_csv('/app/knight/Test_knight.csv')

In [99]:
result = VC_hard.predict(dataTest)

result = pd.DataFrame(result)
result[0] = result[0].map({0: 'Sith', 1: 'Jedi'})
   
result.to_csv('Voting.txt', index=False)