In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn import tree
from sklearn import metrics
import matplotlib.pyplot as plt
import data_preprocess
import warnings

In [None]:
warnings.filterwarnings("ignore")

In [None]:
male_matches = pd.read_csv("data/all_male_matches.csv")
female_matches = pd.read_csv("data/all_female_matches.csv")

male_df = data_preprocess.merge_dataset_total(male_matches, "male")
female_df = data_preprocess.merge_dataset_total(female_matches, "female")

## Neural Network

### Female

In [None]:
X, y = data_preprocess.get_features_target(female_df)
np_y = y.to_numpy()
np_y_one_hot = tf.one_hot(np_y, 2).numpy()

X_train, X_test, y_train, y_test = train_test_split(X, np_y_one_hot, test_size=0.15, random_state=42)


model = tf.keras.Sequential([
    tf.keras.layers.Dense(8, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(2, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
callbacks = [tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)]

history = model.fit(X_train, y_train, epochs=10, callbacks=callbacks, validation_split=0.15)
accuracy = history.history['accuracy']
loss = history.history['loss']
epochs = range(1, len(accuracy) + 1)

In [None]:
# Plot accuracy
plt.plot(epochs, accuracy, 'b', label='Training Accuracy')
plt.title('Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot loss
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.title('Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

### Male

In [None]:
X, y = data_preprocess.get_features_target(male_df)
np_y = y.to_numpy()
np_y_one_hot = tf.one_hot(np_y, 2).numpy()

X_train, X_test, y_train, y_test = train_test_split(X, np_y_one_hot, test_size=0.15, random_state=42)


model = tf.keras.Sequential([
    tf.keras.layers.Dense(8, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(2, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
callbacks = [tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)]

history = model.fit(X_train, y_train, epochs=10, callbacks=callbacks, validation_split=0.15)
accuracy = history.history['accuracy']
loss = history.history['loss']
epochs = range(1, len(accuracy) + 1)


In [None]:
# Plot accuracy
plt.plot(epochs, accuracy, 'b', label='Training Accuracy')
plt.title('Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot loss
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.title('Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

## Decesion tree

### Female

In [None]:
X, y = data_preprocess.get_features_target(female_df)

In [None]:
np_y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, np_y, test_size=0.15, random_state=42)

clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
importances = clf.feature_importances_

plt.title('Feature importances')
plt.xlabel('Importance')
plt.ylabel('Features')
plt.barh(range(len(importances)), importances)
plt.yticks(range(len(importances)), X.columns)
plt.show()

### Male

In [None]:
X, y = data_preprocess.get_features_target(male_df)

In [None]:
np_y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, np_y, test_size=0.15, random_state=42)

clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
importances = clf.feature_importances_

plt.title('Feature importances')
plt.xlabel('Importance')
plt.ylabel('Features')
plt.barh(range(len(importances)), importances)
plt.yticks(range(len(importances)), X.columns)
plt.show()