In [8]:
#K-nærmeste-nabo

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score

data = pd.read_csv("party_data(csv).csv", sep=";")

#print(data.columns)

data_encoded = pd.get_dummies(data)
data_encoded = data_encoded.astype(int)
x = data_encoded.drop(["ok guest_ok","ok guest_not ok"], axis=1)
y = data_encoded["ok guest_ok"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=7)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)

y_test_pred = knn.predict(x_test)
y_train_pred = knn.predict(x_train)
test_accuracy = accuracy_score(y_test, y_test_pred)
train_accuracy = accuracy_score(y_train, y_train_pred)
precision = precision_score(y_test, y_test_pred, pos_label=1)
conf_matrix = confusion_matrix(y_test, y_test_pred)
print('Forvekslingsmatrise:')
print(conf_matrix)
print(f"Trenings nøyaktighet for k=5: {train_accuracy:.2f}")
print(f"Test nøyaktighet for k=5: {test_accuracy:.2f}")
print(f"Presisjon for ok gjester: {precision:.2f}")
#print(data_encoded.head())


Forvekslingsmatrise:
[[49 30]
 [22 99]]
Trenings nøyaktighet for k=5: 0.81
Test nøyaktighet for k=5: 0.74
Presisjon for ok gjester: 0.77


In [3]:
#Logistisk regresjon

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score

data = pd.read_csv("party_data(csv).csv", sep=";") #leser inn datasettet ved hjelp av pandas read. Bruker sep=";" da skille i csv-filen er ";" istedenfor ","
data_encoded = pd.get_dummies(data, drop_first=True) #Bruker pandas.get_dummies() for å konvertere kategoriske kolonner til flere kolonner med 0 og 1.
x = data_encoded.drop("ok guest_ok", axis=1)
y = data_encoded["ok guest_ok"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)
log_reg = LogisticRegression(penalty="l2")
log_reg.fit(x_train, y_train)

y_pred = log_reg.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)
print(f"Nøyaktighet med regularisering {accuracy:.2f}")

log_reg_no_penalty = LogisticRegression(penalty=None)
log_reg_no_penalty.fit(x_train, y_train)
y_pred_no_penalty = log_reg_no_penalty.predict(x_test)
accuracy_no_penalty = accuracy_score(y_test, y_pred_no_penalty)

train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

precision = precision_score(y_test, y_test_pred)

print(f"Nøyaktigheten uten regularisering: {accuracy_no_penalty:.2f}")
print(f"Nøyaktighet trening {train_accuracy:.2f}")
print(f"Nøyaktighet test {test_accuracy:.2f}")
print(f"Presisjon testsett {precision:.2f}")
#print(data_encoded.head())

[[ 52  27]
 [ 14 107]]
Nøyaktighet med regularisering 0.80
Nøyaktigheten uten regularisering: 0.79
Nøyaktighet trening 0.81
Nøyaktighet test 0.74
Presisjon testsett 0.77


In [4]:
#Avgjerdstre
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

data = pd.read_csv("party_data(csv).csv", sep=";")
data["ok guest"] = data["ok guest"].map({"ok": 1, "not ok": 0})
#print(data.columns)
data_encoded = pd.get_dummies(data, columns=["gender", "age", "study", "activity", "music", "is dancer"], drop_first=False)

x = data_encoded.drop(columns=["ok guest"])
y = data_encoded["ok guest"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

tree_gini = DecisionTreeClassifier(criterion="gini", random_state=7)
tree_gini.fit(x_train, y_train)
y_train_pred_gini = tree_gini.predict(x_train)
y_test_pred_gini = tree_gini.predict(x_test)
print("Gini accuracy (Train):", accuracy_score(y_train, y_train_pred_gini))
print("Gini accuracy (Test)", accuracy_score(y_test, y_test_pred_gini))
print("Gini confusion matrix (test):\n", confusion_matrix(y_test, y_test_pred_gini))


tree_entropy= DecisionTreeClassifier(criterion="entropy", random_state=7)
tree_entropy.fit(x_train, y_train)
y_train_pred_entropy = tree_entropy.predict(x_train)
y_test_pred_entropy = tree_entropy.predict(x_test)

precision = precision_score(y_test, y_test_pred_entropy, pos_label = 1) 

print("Entropy Accuracy (Train):", accuracy_score(y_train, y_train_pred_entropy))
print("Entropy Accuracy (Test):", accuracy_score(y_test, y_test_pred_entropy))
print("Entropy Confusion Matrix (Test):\n", confusion_matrix(y_test, y_test_pred_entropy))
print(f"Presisjon for ok gjester: {precision:.2f}")



Gini accuracy (Train): 0.9325
Gini accuracy (Test) 0.68
Gini confusion matrix (test):
 [[53 26]
 [38 83]]
Entropy Accuracy (Train): 0.9325
Entropy Accuracy (Test): 0.66
Entropy Confusion Matrix (Test):
 [[51 28]
 [40 81]]
Presisjon for ok gjester: 0.74
