*This notebook is part of  course materials for CS 345: Machine Learning Foundations and Practice at Colorado State University.
Original versions were created by Asa Ben-Hur and updated by Ross Beveridge.
The content is availabe [on GitHub](https://github.com/asabenhur/CS345).*

*The text is released under the [CC BY-SA license](https://creativecommons.org/licenses/by-sa/4.0/), and code is released under the [MIT license](https://opensource.org/licenses/MIT).*

<a href="https://colab.research.google.com/github//asabenhur/CS345/blob/master/fall24/notebooks/module01_06_perceptron.ipynb">
  <img align="left" src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Predicting Valorant Match Outcomes Using Performance Metrics

By Hallie Gurr and Rose Ordway

In [65]:
import sqlite3
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import numpy as np

#Helper functions
def removeNanRows(X):
    df = pd.DataFrame(X)
    df = df.dropna()
    return df.values

In [66]:
#Database connection
db_path = "data/valorant.sqlite"
conn = sqlite3.connect(db_path)

# Check existing tables names
tables = pd.read_sql_query(
    "SELECT name FROM sqlite_master WHERE type='table';", conn
)
print(tables)

              name
0          Matches
1            Games
2      Game_Rounds
3  Game_Scoreboard


In [67]:
#Get Scoreboard Data
import pandas as pd
ROLE_MAP = {
    "jett": "Duelist",
    "raze": "Duelist",
    "reyna": "Duelist",
    "neon": "Duelist",
    "yoru": "Duelist",
    "phoenix": "Duelist",

    "brimstone": "Controller",
    "omen": "Controller",
    "astra": "Controller",
    "viper": "Controller",
    "harbor": "Controller",
    "clove": "Controller",

    "sova": "Initiator",
    "skye": "Initiator",
    "breach": "Initiator",
    "kayo": "Initiator",
    "fade": "Initiator",
    "gekko": "Initiator",

    "sage": "Sentinel",
    "killjoy": "Sentinel",
    "cypher": "Sentinel",
    "chamber": "Sentinel",
    "deadlock": "Sentinel",
}

def generateTeamCompList(scoreboard):
    sb = scoreboard.copy()

    sb["Role"] = sb["Agent"].map(ROLE_MAP)

    sb_with_role = sb.dropna(subset=["Role"])

    comp = (
        sb_with_role
        .groupby(["GameID", "TeamAbbreviation", "Role"])
        .size()
        .unstack(fill_value=0)
        .reindex(columns=["Duelist", "Controller", "Initiator", "Sentinel"], fill_value=0)
    )

    return comp

def getScoreboard(conn, limit=None):
    #Looking at GameID, TeamAbbreviation, PlayerID, Agent, Kills, Deaths, Assists, HS_Percent, Econ
    headers = ["GameID","TeamAbbreviation", "PlayerID", "Agent", "Kills", "Deaths", 
           "Assists", "HS_Percent", "Econ"]

    selections = ", ".join(headers)

    conditions = " AND ".join([f"{h} IS NOT NULL" for h in headers])

    query = f"""
    SELECT {selections}
    FROM Game_Scoreboard
    WHERE {conditions}
    LIMIT {-1 if limit is None else limit};
    """
    scoreboard = pd.read_sql_query(query, conn)
    return scoreboard

def MLScoreBoardData(conn, limit=None):
    scoreboard = getScoreboard(conn, limit=limit)

    df_teamcomp = generateTeamCompList(scoreboard) 
    scoreboard = assignTeamCompToScoreboard(scoreboard, df_teamcomp)

    feature_cols = ["Kills", "Deaths", "Assists", "HS_Percent", "Econ",
                    "Duelist", "Controller", "Initiator", "Sentinel"]
    features = scoreboard[feature_cols].to_numpy()

    X = removeNanRows(features)
    return X

def assignTeamCompToScoreboard(scoreboard, df_teamcomp):
    df_teamcomp = df_teamcomp.reset_index() 

    merged = scoreboard.merge(
        df_teamcomp,
        on=["GameID", "TeamAbbreviation"],
        how="left"
    )

    return merged

dataLimit = None
mlscoreboard = MLScoreBoardData(conn, limit=dataLimit)
print(mlscoreboard.shape)
df = pd.DataFrame(mlscoreboard, columns=[
    "Kills", "Deaths", "Assists", "HS_Percent", "Econ"
] + ["Duelist", "Controller", "Initiator", "Sentinel"])
print(df)



(147702, 9)
        Kills  Deaths  Assists  HS_Percent  Econ  Duelist  Controller  \
0        24.0    10.0      3.0        0.31  74.0      1.0         1.0   
1        16.0    10.0      7.0        0.16  67.0      1.0         1.0   
2        17.0     9.0      8.0        0.27  58.0      1.0         1.0   
3        17.0    12.0      2.0        0.19  48.0      1.0         1.0   
4         5.0    13.0      3.0        0.22  21.0      1.0         1.0   
...       ...     ...      ...         ...   ...      ...         ...   
147697   13.0    12.0      1.0        0.18  61.0      2.0         1.0   
147698    4.0    13.0      4.0        0.33  32.0      2.0         1.0   
147699    4.0    15.0      0.0        0.13  21.0      2.0         1.0   
147700    3.0    14.0      4.0        0.19  29.0      2.0         1.0   
147701    3.0    14.0      2.0        0.12  18.0      2.0         1.0   

        Initiator  Sentinel  
0             2.0       1.0  
1             2.0       1.0  
2             2.0    

In [None]:
# Some of the teams have less than 5 players (due to missing data) percent of missing players: 
# (I might have fixed this but I'm going to leave it for now)
def getMissingPlayerStats(comp):
    scoreboard = getScoreboard(conn, limit=dataLimit)
    comp["RoleCount"] = comp.sum(axis=1)
    print(f"Number of entries: {scoreboard.shape[0]}")
    print(f"Number of entries with role: {comp['RoleCount'].sum()}")
    print(f"Number of missing entries: {scoreboard.shape[0] - comp['RoleCount'].sum()}")
    print(f"Percent missing: {(scoreboard.shape[0] - comp['RoleCount'].sum()) / scoreboard.shape[0] * 100:.2f}%")

getMissingPlayerStats(generateTeamCompList(getScoreboard(conn, limit=dataLimit)))


Number of entries: 147702
Number of entries with role: 147702
Number of missing entries: 0
Percent missing: 0.00%


# 6% Noise is nomral in most large ML models. Data featuring null values was removed causing this issue. Some team names were not imputed correctly leading to less agents on a team composition than normal.