In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from time import time
import pickle

LOGISTIC REGRESSION

In [3]:
def sigmoid(z):
    """Numerically stable sigmoid function."""
    z = np.clip(z, -500, 500)  # Limit z to avoid overflow
    return np.where(z >= 0,
                    1 / (1 + np.exp(-z)),
                    np.exp(z) / (1 + np.exp(z)))

def cross_entropy(y, y_pred):
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

def compute_gradients(X, y, y_pred):
    diff = y_pred - y
    return np.dot(X.T, diff) / len(y)

def initialize_weights(size):
    std_dev = np.sqrt(2 / (size + 1))
    return np.random.randn(size) * std_dev

class LogisticRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs

    def fit(self, X_train, y_train):
        self.weights = initialize_weights(X_train.shape[1])
        self.losses = []

        for _ in range(self.epochs):
            y_pred = sigmoid(np.dot(X_train, self.weights))
            loss = cross_entropy(y_train, y_pred)
            self.losses.append(loss)

            gradients = compute_gradients(X_train, y_train, y_pred)
            self.weights -= self.lr * gradients

    def predict_probabilities(self, X):
        return sigmoid(np.dot(X, self.weights))
        
    def predict(self, X, threshold=0.5):
        return self.predict_probabilities(X) >= threshold

KNN

In [4]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))
class KNN:
    def __init__(self, k=5):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, x):
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = np.bincount(k_nearest_labels).argmax()
        return most_common
    
    def predict_probabilities(self, x):
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        probs = np.bincount(k_nearest_labels, minlength=np.max(self.y_train)+1) / self.k
        return probs

INCARCARE SETURI DE DATE DE ANTRENARE

In [5]:
df = pd.read_csv('../data/cleaned_rounds_data.csv')
df = df.drop(columns=[f'player_{i}_{suffix}' for i in range(1, 11) for suffix in ['team_name', 'name']])
X = df.drop(['round_winner'], axis=1)
y = df['round_winner']
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
ds_name = "rounds_data"

In [6]:
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [7]:
def train_and_save_logistic_regression(X_train, y_train):
    model = LogisticRegression(lr=0.3, epochs=2000)
    model.fit(X_train, y_train)
    with open('logistic_regression_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    print("Logistic Regression model saved.")

In [8]:
def train_and_save_neural_network(X_train, y_train, X_test, y_test):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = Adam(learning_rate=0.0005)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    model.fit(X_train, y_train, batch_size=64, validation_data=(X_test, y_test), epochs=1000, callbacks=[es], verbose=0)
    model.save('neural_network_model.h5')
    print("Neural Network model saved.")

In [9]:
def train_and_save_xgboost(X_train, y_train):
    model = XGBClassifier(max_depth=9, learning_rate=0.05, gamma=0.2, reg_lambda=0.2, n_estimators=146)
    model.fit(X_train, y_train)
    with open('xgboost_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    print("XGBoost model saved.")

In [10]:
def train_and_save_adaboost(X_train, y_train):
    model = AdaBoostClassifier(algorithm="SAMME", estimator=DecisionTreeClassifier(max_depth=9), learning_rate=0.1, n_estimators=22)
    model.fit(X_train, y_train)
    with open('adaboost_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    print("AdaBoost model saved.")

In [11]:
def train_and_save_knn(X_train, y_train):
    model = KNN(k=45)
    model.fit(X_train, y_train)
    with open('knn_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    print("KNN model saved.")

In [12]:
def train_and_save_random_forest(X_train, y_train):
    model = RandomForestClassifier(criterion="log_loss", n_estimators=150, max_depth=40, min_samples_split=3, n_jobs=-1)
    model.fit(X_train, y_train)
    with open('random_forest_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    print("Random Forest model saved.")

In [13]:
train_and_save_logistic_regression(X_train, y_train)
train_and_save_neural_network(X_train, y_train, X_test, y_test)
train_and_save_xgboost(X_train, y_train)
train_and_save_adaboost(X_train, y_train)
train_and_save_knn(X_train, y_train)
train_and_save_random_forest(X_train, y_train)

Logistic Regression model saved.




Neural Network model saved.
XGBoost model saved.
AdaBoost model saved.
KNN model saved.
Random Forest model saved.
