<a href="https://colab.research.google.com/github/Corosso/ValoAI/blob/main/Project_ValoAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from sklearn import svm

# Get the data 
acs = pd.read_csv("teams.csv")
map_win_rate = pd.read_csv("map_win_rate.csv")
previous_matches = pd.read_csv("previous_matches.csv")


In [2]:
# Split the ACS values into separate columns
acs_s = acs['acs'].str.split(',', expand=True).astype(float)
acs_s.columns = ['acs_1', 'acs_2', 'acs_3', 'acs_4', 'acs_5']

# Concatenate the team name with the ACS values
acs_merged = pd.concat([acs['team'], acs_s], axis=1)

# Remove the number in parentheses from the map name and encode maps
map_win_rate['map'] = map_win_rate['map'].str.replace(r'\s+\(.+\)$', '', regex=True)
encoder_maps = LabelEncoder()
map_win_rate['map'] = encoder_maps.fit_transform(map_win_rate['map'])

# Merge ACS and map win rate
acs_merged = pd.merge(acs_merged, map_win_rate, on="team")
acs_merged.drop(acs_merged.filter(regex="Unname"),axis=1, inplace=True)

In [3]:
#Convert team names to numeric values using label encoder
encoder_teams = LabelEncoder()
acs_merged['team'] = encoder_teams.fit_transform(acs_merged['team'])

In [4]:
# Convert team_A and team_B names to numeric values using label encoder
previous_matches['team_A'] = encoder_teams.transform(previous_matches['team_A'])
previous_matches['team_B'] = encoder_teams.transform(previous_matches['team_B'])

In [5]:
# Merge data
acs_teams1=pd.merge(previous_matches, acs_merged, left_on="team_A", right_on="team")
acs_teams2=pd.merge(previous_matches, acs_merged, left_on="team_B", right_on="team")
data=pd.merge(acs_teams1,acs_teams2, on="team_A")

In [6]:
# Drop redundant data
data=data.drop("winner_y", axis=1)
data=data.drop("team_B_y", axis=1)
data=data.drop("map_y", axis=1)
data=data.drop("team_y", axis=1)
data=data.drop("team_x", axis=1)

In [7]:
# Rename columns
data.rename(columns={"winner_x": "winner", "team_B_x": "team_B", 'map_x': 'map', 'win_rate_x':'win_rate_A','win_rate_y':'win_rate_B','acs_1_x': 'acs_1_A',
                     'acs_2_x': 'acs_2_A','acs_3_x': 'acs_3_A','acs_4_x': 'acs_4_A','acs_5_x': 'acs_5_A','acs_1_y': 'acs_1_B',
                     'acs_2_y': 'acs_2_B','acs_3_y': 'acs_3_B','acs_4_y': 'acs_4_B','acs_5_y': 'acs_5_B'}, inplace=True)

In [8]:
# Replace ("-") for NaN
data['win_rate_A'] = data['win_rate_A'].replace('-', float('nan'))
data['win_rate_B'] = data['win_rate_B'].replace('-', float('nan'))
# Replace NaN for zeros
data = data.fillna(0)
# Convert win rate to float
data['win_rate_A'] = data['win_rate_A'].str.rstrip('%').astype(float) / 100
data['win_rate_B'] = data['win_rate_B'].str.rstrip('%').astype(float) / 100

In [9]:
# Replace NaN for zeros
data = data.fillna(0)

In [10]:
# Select the input variables (ACS and Map Win Rate)
cols = ["team_A","team_B",'acs_1_A', 'acs_2_A', 'acs_3_A', 'acs_4_A', 'acs_5_A','acs_1_B', 'acs_2_B', 'acs_3_B', 'acs_4_B', 'acs_5_B', 'map', 'win_rate_A','win_rate_B']
X = data[cols]

# Select the output variable (winner)
y = data["winner"]

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=1024 )

# Create a logistic regression model
model = LogisticRegression(max_iter=10000)
model2 = svm.SVC(kernel="rbf",C=0.01, gamma="auto")

# Train the model with the training data
model.fit(X_train, y_train)
model2.fit(X_train, y_train)

y_predict=model.predict(X_test)
y_predict2=model2.predict(X_test)

# Evaluate model accuracy with test data
precision = f1_score(y_test, y_predict)
precision2 = f1_score(y_test, y_predict2)

print("Model precission: ", precision)
print("Model 2 precission: ", precision2)

Model precission:  0.816917728852839
Model 2 precission:  0.7387701700828608


In [11]:
# Teams table with its label or tag
teams = pd.read_csv("teams.csv")
teams.drop('acs',inplace=True, axis=1)
teams_encoded = encoder_teams.transform(teams['team'])
teams_encoded = pd.DataFrame(teams_encoded, columns=['Tag'])
teams_table=pd.merge(teams, teams_encoded,left_index=True, right_index=True)
teams_table

Unnamed: 0,team,Tag
0,Team Heretics,17
1,Karmine Corp,10
2,FNATIC,4
3,BBL Esports,1
4,KOI,8
5,Team Vitality,19
6,Team Liquid,18
7,FUT Esports,6
8,Natus Vincere,15
9,Giants Gaming,7


#Make predictions on selected teams

In [12]:
# Which teams are going to play
tA=9
tB=5

In [13]:
#Set the team A on a dataframe

if ((X.loc[X['team_A'] == tA]).empty):
  t1 = X.loc[X['team_B'] == tA]
  t1.drop(['team_A','acs_1_A','acs_2_A','acs_3_A','acs_4_A','acs_5_A','win_rate_A'],inplace=True, axis=1)
  t1.drop_duplicates('map', inplace=True)
  t1 = t1.reset_index(drop=True)
else:
  t1 = X.loc[X['team_A'] == tA]
  t1.drop(['team_B','acs_1_B','acs_2_B','acs_3_B','acs_4_B','acs_5_B','win_rate_B'],inplace=True, axis=1)
  t1.drop_duplicates('map', inplace=True)
  t1 = t1.reset_index(drop=True)  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  t1.drop(['team_B','acs_1_B','acs_2_B','acs_3_B','acs_4_B','acs_5_B','win_rate_B'],inplace=True, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  t1.drop_duplicates('map', inplace=True)


In [14]:
#Set the team B on a dataframe
if ((X.loc[X['team_B'] == tB]).empty):
  t2 = X.loc[X['team_A'] == tB]
  t2.drop(['team_B','acs_1_B','acs_2_B','acs_3_B','acs_4_B','acs_5_B','win_rate_B'],inplace=True, axis=1)
  t2.drop_duplicates('map', inplace=True)
  t2 = t2.reset_index(drop=True)
else:
  t2 = X.loc[X['team_B'] == tB]
  t2.drop(['team_A','acs_1_A','acs_2_A','acs_3_A','acs_4_A','acs_5_A','win_rate_A'],inplace=True, axis=1)
  t2.drop_duplicates('map', inplace=True)
  t2 = t2.reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  t2.drop(['team_A','acs_1_A','acs_2_A','acs_3_A','acs_4_A','acs_5_A','win_rate_A'],inplace=True, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  t2.drop_duplicates('map', inplace=True)


In [15]:
# Merge the data of both teams
m=pd.merge(t1, t2, on='map', how='outer')

In [16]:
# Rename Columns
m.rename(columns={"team_A_x": "team_A",'team_A_y':'team_B', 'win_rate_A_x':'win_rate_A','win_rate_A_y':'win_rate_B','acs_1_A_x': 'acs_1_A',
                     'acs_2_A_x': 'acs_2_A','acs_3_A_x': 'acs_3_A','acs_4_A_x': 'acs_4_A','acs_5_A_x': 'acs_5_A','acs_1_A_y': 'acs_1_B',
                     'acs_2_A_y': 'acs_2_B','acs_3_A_y': 'acs_3_B','acs_4_A_y': 'acs_4_B','acs_5_A_y': 'acs_5_B'}, inplace=True)

In [17]:
# Get the probabilities
X_teams = m[cols]
probs = model.predict_proba(X_teams)

In [18]:
#Get the probs on a Data frame
probs=pd.DataFrame(probs, columns=[f"Probs Team_A: {tA}", f"Probs Team_B: {tB}"])

In [19]:
# Get the map tag for each probs for a map
probs_map=pd.DataFrame(probs.astype(float), map_win_rate['map'])

In [20]:
# Get a map table with its tag
maps = encoder_maps.inverse_transform(map_win_rate['map'])
maps = pd.DataFrame(maps,columns=["map"])
maps_encoded = encoder_maps.transform(maps)
maps_encoded = pd.DataFrame(maps_encoded,columns=["tag"])
maps_table=pd.merge(maps, maps_encoded,left_index=True, right_index=True)


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [21]:
#Merge the map probs and the name of the map with the tag
probs_map=pd.merge(maps_table,probs_map, left_on="tag", right_on="map", how="left")
probs_map.drop_duplicates('Probs Team_A: 9',inplace=True)
probs_map.reset_index(drop=True)
probs_map

Unnamed: 0,map,tag,Probs Team_A: 9,Probs Team_B: 5
0,Bind,1,0.893087,0.106913
20,Haven,4,0.883891,0.116109
40,Split,8,0.915512,0.084488
60,Ascent,0,0.894468,0.105532
80,Icebox,5,0.890054,0.109946
100,Breeze,2,0.865612,0.134388
120,Fracture,3,0.906459,0.093541
140,Pearl,7,0.912362,0.087638
160,Lotus,6,0.909653,0.090347


In [22]:
# Get the average prob of winning
probs.mean()

Probs Team_A: 9    0.896789
Probs Team_B: 5    0.103211
dtype: float64