In [1]:
import pandas as pd
import numpy as np

In [2]:
jp = pd.read_csv('jakepaul.csv', parse_dates=True)
ba = pd.read_csv('benaskren.csv', parse_dates=True)

In [3]:
data = jp.merge(ba)

In [4]:
data

Unnamed: 0,Time,jake paul: (United States),ben askren: (United States)
0,2021-01-31T22:44:00-05:00,78,53
1,2021-01-31T22:52:00-05:00,72,42
2,2021-01-31T23:00:00-05:00,60,49
3,2021-01-31T23:08:00-05:00,56,46
4,2021-01-31T23:16:00-05:00,59,49
...,...,...,...
175,2021-02-01T22:04:00-05:00,54,40
176,2021-02-01T22:12:00-05:00,56,47
177,2021-02-01T22:20:00-05:00,53,50
178,2021-02-01T22:28:00-05:00,52,47


0 means jake paul is worse than ben askren

1 means jake paul is better than ben askren

In [5]:
target = []

for x in range(len(data.values)):
  if data.iat[x,1] <= data.iat[x,2]:
    target.append(0)
  else:
    target.append(1)

targetdf = pd.DataFrame(target)

In [6]:
targetdf = targetdf.astype(float)

In [7]:
data['target'] = targetdf

In [8]:
data

Unnamed: 0,Time,jake paul: (United States),ben askren: (United States),target
0,2021-01-31T22:44:00-05:00,78,53,1.0
1,2021-01-31T22:52:00-05:00,72,42,1.0
2,2021-01-31T23:00:00-05:00,60,49,1.0
3,2021-01-31T23:08:00-05:00,56,46,1.0
4,2021-01-31T23:16:00-05:00,59,49,1.0
...,...,...,...,...
175,2021-02-01T22:04:00-05:00,54,40,1.0
176,2021-02-01T22:12:00-05:00,56,47,1.0
177,2021-02-01T22:20:00-05:00,53,50,1.0
178,2021-02-01T22:28:00-05:00,52,47,1.0


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelBinarizer

In [10]:
array = data.values
X = array[:,1:3]
y = array[:,3]

y = y.astype(float)

X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1)

In [11]:
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('SVM', SVC(gamma='auto')))
models.append(('RF', RandomForestClassifier()))

In [12]:
results = []
names = []

for name, model in models:
	kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
	results.append(cv_results)
	names.append(name)
	print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))

LR: 1.000000 (0.000000)
SVM: 0.742857 (0.045476)
RF: 0.909524 (0.070695)


In [13]:
model = LogisticRegression()
model.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [14]:
predictions = model.predict(X_validation)

In [15]:
predictions

array([0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0.,
       1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0.,
       1., 1.])

In [16]:
def ultimatum():
  ones = 0
  zeroes = 0

  for x in predictions:
    if x == 0:
      zeroes +=1
    else:
      ones +=1
  if ones/zeroes < 1:
    print('Ben Askren will win the fight')
  else:
    print('Jake Paul will win the fight')

In [17]:
ultimatum()

Jake Paul will win the fight


In [18]:
#FOR EMERGENCY USE ONLY
predictions = np.where(predictions == 1, 0, 1)

In [19]:
ultimatum()

Ben Askren will win the fight
