# Valorant Victory Prediction

## Data Pre-processing

In [57]:
# Import libraries
import numpy as np
import pandas as pd
#For plotting
import matplotlib.pyplot as plt
import seaborn as sns
# For clustering
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

## Pre-process match stats

### Dataset overview

The match dataset contains the map name, team ids and names, score, wins and losses during attack and defense, players, and team-composition

In [58]:
s_results = pd.read_csv('./Data files/val-series.csv')
m_results = pd.read_csv('./Data files/val-matches.csv')

In [59]:
m_results['winner'] = np.where(m_results['Team 1 Score'] > m_results['Team 2 Score'], 1, 0)
m_results = m_results.drop([
    'Match Id',
    'Series Id',
    'Event Name',
    'Win Condition',
    'Date',
    'Total Count',
    'Team 1 Id',
    'Team 2 Id',
    'Team 1 Player Ids',
    'Team 2 Player Ids',
    'Team 1 Score',
    'Team 2 Score',
    'Team 1 Score At Half',
    'Team 2 Score At Half', 
    "Team 1 Pistol Wins",
    "Team 1 Attacking Pistol Wins",
    "Team 1 Defending Pistol Wins",
    "Team 1 Attacking Wins",
    "Team 1 Attacking Losses",
    "Team 1 Defending Wins",
    "Team 1 Defending Losses",
    "Team 2 Pistol Wins",
    "Team 2 Attacking Pistol Wins",
    "Team 2 Defending Pistol Wins",
    "Team 2 Attacking Wins",
    "Team 2 Attacking Losses",
    "Team 2 Defending Wins",
    "Team 2 Defending Losses"
    ], axis=1)



In [60]:
  
def player_id_list(results, team_number):
    list_of_player_ids = []

    for index, row in results.iterrows():
        player_ids = row['Team ' + team_number + ' Player Ids']
        player_ids = player_ids[1:-1]
        player_ids = player_ids.split(',')
        list_of_player_ids.append(player_ids)
    return list_of_player_ids

def agents_to_list(results, team_number):
    list_of_agent_lists = []
    for index, row in results.iterrows():
        agents = row['Team ' + team_number + ' Agents'].split(',')
        for x in range(len(agents)):
            agents[x] = agents[x] + '_' + team_number
        list_of_agent_lists.append(agents)
    return list_of_agent_lists



In [61]:
cat_encoder = OneHotEncoder()# instantiate a class
# code the feature called default which takes yes, no value
results_encoded = pd.get_dummies(m_results, columns=[
    'Team 1 Name', 'Team 2 Name', 'Map Name'
])

In [62]:

#m_results['Team 1 Player Ids'] = player_id_list(m_results, '1')
#m_results['Team 2 Player Ids'] = player_id_list(m_results, '2')
results_encoded['Team 1 Agents'] = agents_to_list(m_results,'1')
results_encoded['Team 2 Agents'] = agents_to_list(m_results,'2')
results_encoded.head(5)

Unnamed: 0,Attacking First Team Number,Team 1 Agents,Team 2 Agents,winner,Team 1 Name_100 Thieves,Team 1 Name_9z Team,Team 1 Name_AWS,Team 1 Name_Acend,Team 1 Name_Alter Ego,Team 1 Name_BBL Esports,...,Team 2 Name_ZETA DIVISION,Team 2 Name_looking for a name,Map Name_Ascent,Map Name_Bind,Map Name_Breeze,Map Name_Fracture,Map Name_Haven,Map Name_Icebox,Map Name_Pearl,Map Name_Split
0,2,"[Breach_1, Raze_1, Omen_1, Chamber_1, Fade_1]","[Killjoy_2, Phoenix_2, Omen_2, Chamber_2, Fade_2]",1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,1,"[Cypher_1, Sova_1, Viper_1, Jett_1, KAY/O_1]","[Sova_2, Viper_2, Jett_2, KAY/O_2, Chamber_2]",1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,2,"[Raze_1, Viper_1, Brimstone_1, Skye_1, Fade_1]","[Raze_2, Viper_2, Brimstone_2, Skye_2, Chamber_2]",0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,1,"[Sova_1, Killjoy_1, Omen_1, Jett_1, KAY/O_1]","[Sova_2, Killjoy_2, Omen_2, Jett_2, KAY/O_2]",1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,2,"[Killjoy_1, Phoenix_1, Omen_1, Chamber_1, Fade_1]","[Breach_2, Killjoy_2, Omen_2, Jett_2, Fade_2]",1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [63]:
mlb = MultiLabelBinarizer()
results_encoded = results_encoded.join(pd.DataFrame(mlb.fit_transform(results_encoded.pop('Team 1 Agents')),
                          columns=mlb.classes_,
                          index=m_results.index))
results_encoded = results_encoded.join(pd.DataFrame(mlb.fit_transform(results_encoded.pop('Team 2 Agents')),
                          columns=mlb.classes_,
                          index=m_results.index))

In [64]:
# Number of records having team 1 as the victor
team1 = results_encoded[results_encoded.winner==1].shape[0]
# Number of records having team 2 as the victor
team2 = results_encoded[results_encoded.winner==0].shape[0]

print(f'There is {team1} examples with team 1 winning, which is {round(team1/results_encoded.shape[0],2)*100}%')
print(f'There is {team2} examples with team 2 winning, which is {round(team2/results_encoded.shape[0],2)*100}%')

There is 847 examples with team 1 winning, which is 53.0%
There is 741 examples with team 2 winning, which is 47.0%


### Pre-process player stats

In [65]:
acs = pd.read_csv('./Data files/acs.csv')
agent_wr = pd.read_csv('./Data files/agent-win-rates.csv')
agent_picks = pd.read_csv('./Data files/agent-picks.csv')
aim = pd.read_csv('./Data files/aim.csv')
assists = pd.read_csv('./Data files/assists.csv')
clutches = pd.read_csv('./Data files/clutches.csv')
damage = pd.read_csv('./Data files/damage.csv')
defusals = pd.read_csv('./Data files/defusals.csv')
headshots = pd.read_csv('./Data files/headshots.csv')
kd = pd.read_csv('./Data files/kd.csv')
multikill = pd.read_csv('./Data files/multi-kills.csv')
plants = pd.read_csv('./Data files/plants.csv')
spend = pd.read_csv('./Data files/spend.csv')
weapon_picks = pd.read_csv('./Data files/weapon-picks.csv')


### Pre-process team stats

In [66]:
team_attack_speed = pd.read_csv('./Data files/team-attack-speed.csv')
team_comp = pd.read_csv('./Data files/team-comp.csv')
team_map_performance = pd.read_csv('./Data files/team-map-performance.csv')
team_post_plants = pd.read_csv('./Data files/team-post-plants.csv')
team_round_performance = pd.read_csv('./Data files/team-round-performance.csv')
team_win_conditions = pd.read_csv('./Data files/team-win-conditions.csv')
team_xvy_performance = pd.read_csv('./Data files/team-xvy-performance.csv')

## Prepare features for testing: previous match data ONLY

In [67]:
Y = results_encoded['winner']
X = results_encoded.drop("winner", axis=1)

In [68]:
#Import necessary library
from sklearn import preprocessing
#Normalize the data
normalized_X= preprocessing.normalize(X)
#Check normalization
print(normalized_X)

[[0.48507125 0.         0.         ... 0.         0.         0.        ]
 [0.26726124 0.         0.         ... 0.26726124 0.26726124 0.        ]
 [0.48507125 0.         0.         ... 0.         0.24253563 0.        ]
 ...
 [0.26726124 0.         0.         ... 0.26726124 0.26726124 0.        ]
 [0.26726124 0.         0.         ... 0.         0.26726124 0.        ]
 [0.26726124 0.         0.         ... 0.         0.         0.        ]]


In [69]:
from sklearn.model_selection import train_test_split
#Split testing into 20%
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1904703)

In [70]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix

#Create an instance of RandomForestClassifier
rf=RandomForestClassifier(random_state=1904703, n_estimators=100)

#Fit model with training data
rf.fit(X_train, y_train)

RandomForestClassifier(random_state=1904703)

In [71]:
# Prediction
rf_predictions = rf.predict(X_test)
#Results
accuracy = accuracy_score(y_test, rf_predictions)
print(f'The overall accuracy of RF is {np.round(accuracy*100,2)}%')

The overall accuracy of RF is 50.63%
