In [None]:
import pandas as pd
import numpy as np 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, log_loss

from tqdm import tqdm

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',50)

In [None]:
df_home=pd.read_csv('/Users/aurelientarroux/Desktop/Projet_prog/Projet_1/Data/Train_Data/train_home_team_statistics_df.csv')
df_away=pd.read_csv('/Users/aurelientarroux/Desktop/Projet_prog/Projet_1/Data/Train_Data/train_away_team_statistics_df.csv')

df_home.columns='home_'+df_home.columns
df_away.columns='away_'+df_away.columns

df_result=pd.read_csv('/Users/aurelientarroux/Desktop/Projet_prog/Projet_1/Data/Y_train_1rknArQ.csv')

base_match=pd.merge(df_home,df_away,left_on="home_ID",right_on='away_ID',how='inner')
base=pd.merge(base_match,df_result,left_on="home_ID",right_on='ID',how='inner')
base=base.drop(['home_ID','home_LEAGUE','home_TEAM_NAME','away_ID','away_LEAGUE','away_TEAM_NAME'],axis=1)

def resultat(row):
    if row['HOME_WINS']==1:
        return 'HOME'
    elif row['AWAY_WINS']==1:
        return 'AWAY'
    else:
        return 'DRAW'

base['Resultat']=base.apply(resultat, axis=1)
base=base.drop(['HOME_WINS','AWAY_WINS','DRAW'],axis=1)

base.dropna(inplace=True)


X = base.drop(['Resultat','ID'],axis=1)
y = base['Resultat']

encoded_y = pd.get_dummies(y).applymap(lambda x: 1 if x else 0)
encoded_y.columns=[0,1,2]

scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_standardized, encoded_y, test_size=0.2, random_state=42)

y_train.reset_index(inplace=True)
y_train.drop(columns=['index'],axis=1,inplace=True)


In [None]:
def initialisation(X):
    
    W = np.random.randn(X.shape[1],3)
    b = pd.Series(np.random.randn(3))
    return W, b


def model(X, W, b):
    
    Z = X.dot(W) + np.tile(b.values.T, (6610, 1))
    A = pd.DataFrame(np.exp(Z))

    for k in range(len(A)):
        for i in range(3):
            A.iloc[k,i] = A.iloc[k,i] / (A.iloc[k,:]).sum(axis=0)
    return A

def gradients(A, X, y):
    
    dW = 1 / len(y_train) * np.dot(X.T, A - y)
    db = 1 / len(y_train) * np.sum(A - y,axis=0)
    return (dW, db)

def update(dW, db, W, b, learning_rate):

    W = W - learning_rate * dW
    b = b - learning_rate * db

    return (W, b)

def predict(X, W, b):
    A = model(X, W, b)
    return A >= 0.5

In [None]:
def artificial_neuron(X_train, y_train, learning_rate = 0.1, n_iter = 100):

    W, b = initialisation(X_train)

    for i in tqdm(range(n_iter)):
        
        A = model(X_train, W, b)
        dW, db = gradients(A, X_train, y_train)
        W, b = update(dW, db, W, b, learning_rate)
        print(log_loss(y_train, A))
    
    return (W, b)

In [None]:
W, b = artificial_neuron(X_train, y_train, learning_rate = 0.1, n_iter=1000)

In [None]:
y_proba = X_test.dot(W) + np.tile(b.values.T, (1653, 1))

y_pred = []
for k in y_proba:
    y_pred += [k.argmax()]
encoded_y_pred = pd.get_dummies(y_pred).applymap(lambda x: 1 if x else 0)
encoded_y_pred.columns=[0,1,2]
accuracy_score(encoded_y_pred,y_test)