### Imports

In [None]:
# all the imports 
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

### Reading the data

In [None]:
# reading the data
data = pd.read_csv('../combined_data/combined_fight_data.csv', low_memory=False)
data.head()

### Changing the labels column to bool type

In [None]:
# changing winner label to bool and keeping whether or not Red fighter won
data['Winner'] = data['Winner'].apply(lambda x: True if x == 'Red' else False)
data['R_Winner'] = data['Winner']

### Dropping useless columns

In [None]:
data = data.drop(columns=['R_fighter', 'B_fighter', 'Referee', 'date', 'city', 'country', 'Winner', 'end_how'])
data = data.fillna(0)

### One hot encoding the categorical data

In [None]:
# weight_class, B_Stance, R_Stance
data = pd.get_dummies(data, columns=['weight_class', 'B_Stance', 'R_Stance','end_method'])
data.shape

### Getting features

In [None]:
features = data.loc[:, data.columns != 'R_Winner']
features

### Getting labels

In [None]:
labels = data.loc[:, 'R_Winner']
labels

### Running logistic regression

In [None]:
def run_logistic_regression(features, labels, params):
    logistic_cm = []
    logistic_fscore = []
    logistic_score = []
    
    for seed in range(1, 6):
        X_train, X_test, y_train, y_test = train_test_split(features, labels, 
                                                            random_state=seed,
                                                            test_size=0.20)
        # scaling the data
        scaler = StandardScaler()
        scaler.fit(X_train)
            
        #transforming both training and testing data
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    

        # creating logistic classifier
        logistic = LogisticRegression(**params)

        # fitting training data and predicting test data
        logistic.fit(X_train_scaled, y_train)
        logistic_predict = logistic.predict(X_test_scaled)

        # compute the performance metrics
        logistic_cm.append(confusion_matrix(y_test, logistic_predict))
        logistic_fscore.append(f1_score(y_test, logistic_predict, average=None))
        logistic_score.append(accuracy_score(y_test, logistic_predict))

    print('Logistic confusion matrix:\n', np.mean(np.array(logistic_cm), axis=0))
    print('Logistic f-score:', np.mean(np.array(logistic_fscore)))  
    print('Logistic accuracy score:', np.mean(np.array(logistic_score)), '\n')  

In [None]:
params = {'solver': 'sag', 'random_state': 42, 'max_iter': 100}

run_logistic_regression(features, labels, params)