In [27]:
import numpy as np
from sklearn.linear_model import LogisticRegression

import mlflow
import mlflow.sklearn

if __name__ == "__main__":
    X = np.array([-2, -1, 0, 1, 2, 1]).reshape(-1, 1)
    y = np.array([0, 0, 1, 1, 1, 0])
    lr = LogisticRegression()
    lr.fit(X, y)
    score = lr.score(X, y)
    print("Score: %s" % score)
    mlflow.log_metric("score", score)
    mlflow.sklearn.log_model(lr, "model")
    print("Model saved in run %s" % mlflow.active_run().info.run_uuid)
    mlflow.end_run()

Score: 0.6666666666666666
Model saved in run a6f46f9116484e0197b06d795914d186


## Testing Random Forest Model

In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
# Launch the experiment on mlflow
experiment_name = "RF 100 Games"
mlflow.set_experiment(experiment_name)

In [34]:
df = pd.read_csv('Games_1_100.csv')

In [83]:
#Defining X and y variables
X = df.drop(['game_id','home_win'], axis=1)
y = df['home_win']

In [84]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [85]:
#Scale the Data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train2 = sc.fit_transform(x_train)
x_test2 = sc.transform(x_test)

In [86]:
#Script for Logistical Regression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix  

for name,method in [('RandomForestClassifier', RandomForestClassifier(n_estimators = 10, criterion = 'entropy',random_state=10))]: 
    method.fit(x_train2,y_train)
    predict = method.predict(x_test2)
    target_names=['loss', 'win']
    print(confusion_matrix(y_test,predict))  
    print(classification_report(y_test,predict,target_names=target_names))

[[5 1]
 [5 9]]
              precision    recall  f1-score   support

        loss       0.50      0.83      0.62         6
         win       0.90      0.64      0.75        14

    accuracy                           0.70        20
   macro avg       0.70      0.74      0.69        20
weighted avg       0.78      0.70      0.71        20



## Trial 2 - Testing different accuracies

In [54]:
#Defining X and y variables
X = df.drop(['game_id','home_win'], axis=1)
y = df['home_win']

In [55]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [None]:
#Script for Logistical Regression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score  
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from numpy import mean
from numpy import std

for name,method in [('RandomForestClassifier', RandomForestClassifier(n_estimators = 10, criterion = 'entropy',random_state=10))]: 
    method.fit(x_train,y_train)
    predict = method.predict(x_test)
    target_names=['loss', 'win']
    # Calculate the absolute errors
    errors = abs(predict - y_test)
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    n_scores = cross_val_score(method, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
    n2_scores = cross_val_score(method, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1, error_score='raise')
    print('MAE: %.3f (%.3f)' % (mean(n2_scores), std(n2_scores)))
    #Accuracy performance
    print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
    # Print out the mean absolute error (mae)
    print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')
    print(accuracy_score(y_test, predict))
    print(confusion_matrix(y_test,predict))  
    print(classification_report(y_test,predict,target_names=target_names))

## Testing RF Model with MLFlow

In [98]:
# Launch the experiment on mlflow
experiment_name = "RF 100 Games Test"
mlflow.set_experiment(experiment_name)

df = pd.read_csv('Games_1_100.csv')

#Defining X and y variables
X = df.drop(['game_id','home_win'], axis=1)
y = df['home_win']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

#Script for Logistical Regression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix 

for name,method in [('RandomForestClassifier', RandomForestClassifier(n_estimators = 10, criterion = 'entropy',random_state=10))]: 
    method.fit(x_train2,y_train)
    predict = method.predict(x_test2)
    target_names=['loss', 'win']
    metrics = {'Accuracy':accuracy_score(y_test,predict)}
    cm = confusion_matrix(y_test, predict)
    t_n, f_p, f_n, t_p = cm.ravel()
    mlflow.log_metric("tn", t_n)
    mlflow.log_metric("fp", f_p)
    mlflow.log_metric("fn", f_n)
    mlflow.log_metric("tp", t_p)
    # Log in mlflow (metrics)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(method, "RF-model")
mlflow.end_run()

## Adding to the model

In [99]:
# Launch the experiment on mlflow
experiment_name = "RF 100 Games Test"
mlflow.set_experiment(experiment_name)

df = pd.read_csv('Games_1_100.csv')

#Defining X and y variables
X = df.drop(['game_id','home_win'], axis=1)
y = df['home_win']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

#Script for Logistical Regression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix 
n_estimators = 10
max_depth = 6
max_features = 3 

for name,method in [('RandomForestClassifier', RandomForestClassifier(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features, random_state=10))]: 
    method.fit(x_train2,y_train)
    predict = method.predict(x_test2)
    target_names=['loss', 'win']

    # Log parameters
    mlflow.log_param("num_trees", n_estimators)
    mlflow.log_param("maxdepth", max_depth)
    mlflow.log_param("max_feat", max_features)

    #Creating metrics
    metrics = {'Accuracy':accuracy_score(y_test,predict)}
    cm = confusion_matrix(y_test, predict)
    t_n, f_p, f_n, t_p = cm.ravel()

    # Log in mlflow (metrics)
    mlflow.log_metric("tn", t_n)
    mlflow.log_metric("fp", f_p)
    mlflow.log_metric("fn", f_n)
    mlflow.log_metric("tp", t_p)
    mlflow.log_metrics(metrics)
    
    #Logging model
    mlflow.sklearn.log_model(method, "RF-model")
mlflow.end_run()