In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.read_excel('futbolVeriNB.xlsx')

In [None]:
data = data.loc[(data['Position'] != 'GK') & (data['Goals'].notna()) & (data['Position'] != 'DF')]

In [None]:
data.loc[:, 'isScored?'] = data['Goals'].apply(lambda x: 1 if x > 0 else 0)

In [None]:
data["Min"] = data["Min"].astype(str).str.replace(',', '', regex=True).astype(int)

In [None]:
X = data.loc[:, ["Position" ,"Min", "xG", "Shots", "ShotsOnTarget", "PrgR", "PrgCarryingDist", "Touches_Att_Pen", "CarriesIntoPenaltyArea", "GCA_Shot"]]
y = data.loc[:, ["isScored?"]]

In [None]:
data[data['isScored?'] == False]

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
y_train = y_train.values.reshape(-1)
y_test = y_test.values.reshape(-1)

In [None]:
import numpy as np

class GaussianNB:
    def __init__(self, var_smoothing=1e-9):
        self.var_smoothing = var_smoothing

    def fit(self, X, y):
        self.classes_ = np.unique(y)
        n_features = X.shape[1]
        n_classes = len(self.classes_)

        self.theta_ = np.zeros((n_classes, n_features))
        self.var_ = np.zeros((n_classes, n_features))
        self.class_prior_ = np.zeros(n_classes)

        for i, c in enumerate(self.classes_):
            X_c = X[y == c]
            self.theta_[i] = np.mean(X_c, axis=0)
            self.var_[i] = np.var(X_c, axis=0) + self.var_smoothing
            self.class_prior_[i] = X_c.shape[0] / X.shape[0]

    def predict(self, X):
        log_likelihood = []
        for i in range(len(self.classes_)):
            prior = np.log(self.class_prior_[i])
            likelihood = -0.5 * np.sum(np.log(2 * np.pi * self.var_[i]))
            likelihood -= 0.5 * np.sum(((X - self.theta_[i]) ** 2) / self.var_[i], axis=1)
            log_likelihood.append(prior + likelihood)

        return self.classes_[np.argmax(np.array(log_likelihood).T, axis=1)]

In [None]:
classifier = GaussianNB()
import time

startTime = time.time()

classifier.fit(X_train, y_train)

endTime = time.time()

elapsedTime = endTime - startTime
print(f"Gecen sure: {elapsedTime:.6f} saniye")

In [None]:
import time

startTime = time.time()

y_pred = classifier.predict(X_test)

endTime = time.time()

elapsedTime = endTime - startTime
print(f"Gecen sure: {elapsedTime:.6f} saniye")

print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_test, y_pred)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, 
            xticklabels=['Gol atamadi', 'Gol atti'], 
            yticklabels=['Gol atamadi', 'Gol atti'])

plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

TP = cm[1, 1]
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]

precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = TP / (TP + FN) if (TP + FN) != 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
fpr = FP / (FP + TN) if (FP + TN) != 0 else 0

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print(f"Specificity: {specificity}")
print(f"False Positive Rate: {fpr}")