In [None]:
import requests
import pandas as pd
# getting the dataset from openml 
url = "https://www.openml.org/data/download/22102255/dataset"
r = requests.get(url, allow_redirects = True)

In [None]:
with open("data.txt", "wb") as f:
    f.write(r.content)

In [None]:
data = []

with open("data.txt", "r") as f:
    for line in f.read().split("\n"):
        if line.startswith("@") or line.startswith("%") or line == "":
            continue
        data.append(line)

In [None]:
columns = []

with open("data.txt", "r") as f:
    for line in f.read().split("\n"):
        if line.startswith("@ATTRIBUTE"):
            columns.append(line.split(" ")[1])

In [None]:
# create a csv file that join collumns and data together
with open("df.csv", "w") as f:
    f.write(",".join(columns))
    f.write("\n")
    f.write("\n".join(data))

In [None]:
df = pd.read_csv("df.csv")
df.columns = columns

In [None]:
# decode the t_win with 0 and 1
df["t_win"] = df["round_winner"].astype("category").cat.codes

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
try:
    correlations = df[columns + ["t_win"]].corr()
except ValueError:
    pass
print(correlations['t_win'].apply(abs).sort_values(ascending = False).iloc[:25])

In [None]:
selected_columns = []

for col in columns + ["t_win"]:
    try:
        if abs(correlations[col]["t_win"]) > 0.15:
            selected_columns.append(col)
    except KeyError:
        pass
df_selected = df[selected_columns]

In [None]:
fix, ax = plt.subplots(figsize = (18, 12))
sns.heatmap(df_selected.corr().sort_values(by = "t_win"),
            annot = True,
            cmap = "YlGnBu");

In [None]:
df_selected.hist(figsize=(18, 12));

In [None]:
from sklearn.model_selection import train_test_split

# split the data into features and labels data
X = df_selected.drop(["t_win"], axis = 1)
y = df_selected["t_win"]

# split into train test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### Using KNeighborsClassifier 

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier 

sclaler = StandardScaler()
X_train_scaled = sclaler.fit_transform(X_train) 
X_test_scaled = sclaler.transform(X_test)

knn = KNeighborsClassifier()
knn.fit(X_train_scaled, y_train);

In [None]:
knn.score(X_test_scaled, y_test)

In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_grid = {
    "n_neighbors" : list(range(5, 17, 2)),
    "weights" : ["uniform", "distance"]
}

knn = KNeighborsClassifier(n_jobs = 4) 

clf = RandomizedSearchCV(knn, param_grid, n_jobs = 4, n_iter = 3, verbose = 2, cv = 3)
clf.fit(X_train_scaled, y_train);

In [None]:
knn = clf.best_estimator_
knn.score(X_test_scaled, y_test)

### Using RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier()
forest.fit(X_train_scaled, y_train);

In [None]:
forest.score(X_test_scaled, y_test)

### Using Neural Network with Tensorflow

In [None]:
from tensorflow import keras

model = keras.models.Sequential()
model.add(keras.layers.Input(shape = (20, )))
model.add(keras.layers.Dense(200, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(1, activation="sigmoid"))

In [None]:
model.compile(loss = "binary_crossentropy", optimizer = "adam", metrics = ["accuracy"])

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5)

X_train_scaled_train, X_valid, y_train_train, y_valid = train_test_split(X_train_scaled,
                                                                         y_train,
                                                                         test_size = 0.15)

model.fit(X_train_scaled_train, y_train_train, epochs=30, callbacks=[early_stopping_cb], validation_data=(X_valid, y_valid))

In [None]:
model.evaluate(X_test_scaled, y_test)