# Counter Strike Global Offensive Match Result Prediction
- **Leonardo Valerio Morales 771030**
- **Luis Felipe Dobner Henriques 771036**

This notebook executes data pre-processing and predictive analysis of Counter Strike Global Offensive Matches.

# Enviroment Variables
This step loads Everything needed for Neo4j and Apache Kudu to Work

In [1]:
#Enviroment Variables
import random
import os
from os import listdir

import pyspark
from pyspark.context import SparkContext
from pyspark.sql import SparkSession, SQLContext
from pyspark.sql.types import *
from pyspark.sql.window import Window
import pyspark.sql.functions as F

from ipywidgets import interact, widgets

KUDU_MASTER = 'kudu-master-1:7051'

In [3]:
#Enviroment Variables
os.environ['PYSPARK_SUBMIT_ARGS'] = f'--packages org.apache.kudu:kudu-spark3_2.12:1.13.0.7.1.5.17-1,org.neo4j:neo4j-connector-apache-spark_2.12:5.0.1_for_spark_3 --repositories https://repository.cloudera.com/artifactory/cloudera-repos/ pyspark-shell'

In [4]:
#Enviroment Variables
spark = SparkSession.builder.config('spark.packages', 'org.apache.kudu:kudu-spark3_2.12:1.13.0.7.1.5.17-1,org.neo4j:neo4j-connector-apache-spark_2.12:5.0.1_for_spark_3').getOrCreate()
sc = SparkContext.getOrCreate()
sc.setLogLevel('OFF')

In [5]:
def store_in_kudu(df, table):
    df.write.option('kudu.master', KUDU_MASTER).option('kudu.table', f'impala::default.{table}').mode('append').format('kudu').save()

In [6]:
def read_from_kudu(table):
     return spark.read.option('kudu.master', KUDU_MASTER).option('kudu.table', f'impala::default.{table}').format('kudu').load()

# Data Loading
This step will create the tables and schemas in both databases and load the concerning data from the dataset into Neo4j and Apache Kudu.

In [7]:
!unzip -n {'./archive.zip'}

Archive:  ./archive.zip
  inflating: economy.csv             
  inflating: picks.csv               
  inflating: players.csv             
  inflating: results.csv             


In [8]:
# Data Extraction
df1 = spark.read.format("csv").option("header", "true").load("./results.csv")
df1.printSchema()


root
 |-- date: string (nullable = true)
 |-- team_1: string (nullable = true)
 |-- team_2: string (nullable = true)
 |-- _map: string (nullable = true)
 |-- result_1: string (nullable = true)
 |-- result_2: string (nullable = true)
 |-- map_winner: string (nullable = true)
 |-- starting_ct: string (nullable = true)
 |-- ct_1: string (nullable = true)
 |-- t_2: string (nullable = true)
 |-- t_1: string (nullable = true)
 |-- ct_2: string (nullable = true)
 |-- event_id: string (nullable = true)
 |-- match_id: string (nullable = true)
 |-- rank_1: string (nullable = true)
 |-- rank_2: string (nullable = true)
 |-- map_wins_1: string (nullable = true)
 |-- map_wins_2: string (nullable = true)
 |-- match_winner: string (nullable = true)



In [9]:
# Data Extraction
df2 = spark.read.format("csv") \
.option("header", "true") \
.load("./economy.csv")
display(df2)

DataFrame[date: string, match_id: string, event_id: string, team_1: string, team_2: string, best_of: string, _map: string, t1_start: string, t2_start: string, 1_t1: string, 2_t1: string, 3_t1: string, 4_t1: string, 5_t1: string, 6_t1: string, 7_t1: string, 8_t1: string, 9_t1: string, 10_t1: string, 11_t1: string, 12_t1: string, 13_t1: string, 14_t1: string, 15_t1: string, 16_t1: string, 17_t1: string, 18_t1: string, 19_t1: string, 20_t1: string, 21_t1: string, 22_t1: string, 23_t1: string, 24_t1: string, 25_t1: string, 26_t1: string, 27_t1: string, 28_t1: string, 29_t1: string, 30_t1: string, 1_t2: string, 2_t2: string, 3_t2: string, 4_t2: string, 5_t2: string, 6_t2: string, 7_t2: string, 8_t2: string, 9_t2: string, 10_t2: string, 11_t2: string, 12_t2: string, 13_t2: string, 14_t2: string, 15_t2: string, 16_t2: string, 17_t2: string, 18_t2: string, 19_t2: string, 20_t2: string, 21_t2: string, 22_t2: string, 23_t2: string, 24_t2: string, 25_t2: string, 26_t2: string, 27_t2: string, 28_t

In [10]:
df_limited = df1.select(df1['match_id'], df1['_map'], df1['team_1'], df1['team_2'], df1['map_winner'], df1['starting_ct'], df1['match_winner'])
display(df_limited)

DataFrame[match_id: string, _map: string, team_1: string, team_2: string, map_winner: string, starting_ct: string, match_winner: string]

In [11]:
df2_limited = df2.select(df2['match_id'], df2['_map'], df2['team_1'], df2['team_2'], df2['best_of'])
display(df2_limited)

DataFrame[match_id: string, _map: string, team_1: string, team_2: string, best_of: string]

In [12]:
#PrÃ©-processamento para Tabela de Jogos no Kudu
df_collect = df_limited.collect()
returnval = []
for i in range(df_limited.count()):
    selected_row = df_collect[i]

    map_winner_num = selected_row['map_winner']
    map_winner = selected_row[f'team_{map_winner_num}']

    starting_ct_num = selected_row['starting_ct']
    start_ct = selected_row[f'team_{starting_ct_num}']

    tr = ''
    if start_ct == selected_row['team_2']:
        tr = selected_row['team_1']
    else:
        tr = selected_row['team_2']
        
    
    returnval.append([selected_row['match_id'], selected_row['_map'], selected_row['team_1'], selected_row['team_2'], map_winner, start_ct, tr])
    

schema = ['partida','mapa', 'equipe1','equipe2','vitorioso','ct','tr']
df_mapas = spark.createDataFrame(returnval, schema)
display(df_mapas)

DataFrame[partida: string, mapa: string, equipe1: string, equipe2: string, vitorioso: string, ct: string, tr: string]

In [13]:
store_in_kudu(df_mapas,'jogos')

In [91]:
table = read_from_kudu('jogos')
table.createOrReplaceTempView('jogos')
display(table)

DataFrame[partida: string, mapa: string, equipe1: string, equipe2: string, vitorioso: string, ct: string, tr: string]

In [15]:
condition = [df_limited.match_id == df2_limited.match_id, df_limited._map == df2_limited._map]
df_join = df_limited.join(df2_limited,condition,"inner").select(df_limited.match_id,df_limited._map, df_limited.team_1, df_limited.team_2,df_limited.match_winner,df2_limited.best_of)
display(df_join)

DataFrame[match_id: string, _map: string, team_1: string, team_2: string, match_winner: string, best_of: string]

In [17]:
df_join = df_join.withColumnRenamed('match_id', 'partida') \
                    .withColumnRenamed('_map', 'mapa') \
                .withColumnRenamed('team_1', 'equipe1') \
                .withColumnRenamed('team_2', 'equipe2') \
                .withColumnRenamed('match_winner', 'vitorioso') \
                .withColumnRenamed('best_of', 'md')
# df_join = df_join.limit(1000)
display(df_join)

DataFrame[partida: string, mapa: string, equipe1: string, equipe2: string, vitorioso: string, md: string]

In [18]:
df_missing_teams = df_join.select('equipe2').distinct().subtract(df_join.select('equipe1').distinct())
df_teams = df_join.select('equipe1').distinct().union(df_missing_teams).withColumnRenamed('equipe1','teams')
df_join.select('equipe2').distinct().exceptAll(df_teams).show()
df_join.select('equipe1').distinct().exceptAll(df_teams).show()

+-------+
|equipe2|
+-------+
+-------+

+-------+
|equipe1|
+-------+
+-------+



In [19]:
store_in_kudu(df_join,'proc')

In [20]:
teste = read_from_kudu('proc')
teste.createOrReplaceTempView('proc')
display(teste)

DataFrame[partida: string, mapa: string, equipe1: string, equipe2: string, vitorioso: string, md: string]

In [21]:
df_collect = df_teams.collect()
data = []
for i in range(df_teams.count()):
    selected_row = df_collect[i]
    current_team = selected_row['teams']
    print(current_team)
    
    num_jogos1 = spark.sql(f'SELECT COUNT(*) as cnt FROM (SELECT DISTINCT partida FROM proc WHERE equipe1 = "{current_team}" or equipe2 = "{current_team}") a')
    total_jogos = num_jogos1.collect()[0][0]

    vitorias = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE (equipe1 = "{current_team}" and vitorioso = "1") or (equipe2 = "{current_team}" and vitorioso = "2" ) ) a').collect()[0][0] 
    derrotas = total_jogos - vitorias

    jmd5 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "5" and (equipe1 = "{current_team}" or equipe2 = "{current_team}")) a').collect()[0][0]
    md5 = 0
    if jmd5 > 0:
        md5 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "5" and ((equipe1 = "{current_team}"  and vitorioso = "1")  or (equipe2 = "{current_team}" and vitorioso = "2")) ) a').collect()[0][0]

    jmd3 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "3" and (equipe1 = "{current_team}" or equipe2 = "{current_team}")) a').collect()[0][0]
    md3 = 0
    if jmd3 > 0:
        md3 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "3" and ((equipe1 = "{current_team}" and vitorioso = "1") or (equipe2 = "{current_team}" and vitorioso = "2")) ) a').collect()[0][0]

    jmd2 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "2" and (equipe1 = "{current_team}" or equipe2 = "{current_team}")) a').collect()[0][0]
    md2 = 0
    if jmd2 > 0:
        md2 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "2" and ((equipe1 = "{current_team}" and vitorioso = "1") or (equipe2 = "{current_team}" and vitorioso = "2") ) ) a').collect()[0][0]

    jmd1 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "1" and (equipe1 = "{current_team}" or equipe2 = "{current_team}")) a').collect()[0][0]
    md1 = 0
    if jmd1 > 0:    
        md1 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "1" and  ((equipe1 = "{current_team}"  and vitorioso = "1") or (equipe2 = "{current_team}" and vitorioso = "2"))) a').collect()[0][0]
        

    data.append([current_team,total_jogos,vitorias,derrotas, md1, md2, md3, md5,jmd1, jmd2, jmd3,jmd5])

schema = ['equipe','jogos', 'vitorias', 'derrotas','md1', 'md2', 'md3', 'md5','jmd1', 'jmd2', 'jmd3','jmd5']
df_equipes = spark.createDataFrame(data, schema)
display(df_equipes)

DataFrame[equipe: string, jogos: bigint, vitorias: bigint, derrotas: bigint, md1: bigint, md2: bigint, md3: bigint, md5: bigint, jmd1: bigint, jmd2: bigint, jmd3: bigint, jmd5: bigint]

In [22]:
df_equipes = df_equipes.withColumn('jogos', df_equipes.jogos.cast(DecimalType(8, 5))) \
    .withColumn('vitorias', df_equipes.vitorias.cast(DecimalType(8, 5))) \
    .withColumn('derrotas', df_equipes.derrotas.cast(DecimalType(8, 5))) \
    .withColumn('md1', df_equipes.md1.cast(DecimalType(8, 5))) \
    .withColumn('md2', df_equipes.md2.cast(DecimalType(8, 5))) \
    .withColumn('md3', df_equipes.md3.cast(DecimalType(8, 5))) \
    .withColumn('md5', df_equipes.md5.cast(DecimalType(8, 5))) \
    .withColumn('jmd1', df_equipes.jmd1.cast(DecimalType(8, 5))) \
    .withColumn('jmd2', df_equipes.jmd2.cast(DecimalType(8, 5))) \
    .withColumn('jmd3', df_equipes.jmd3.cast(DecimalType(8, 5))) \
    .withColumn('jmd5', df_equipes.jmd5.cast(DecimalType(8, 5)))
df_equipes.printSchema()

root
 |-- equipe: string (nullable = true)
 |-- jogos: decimal(8,5) (nullable = true)
 |-- vitorias: decimal(8,5) (nullable = true)
 |-- derrotas: decimal(8,5) (nullable = true)
 |-- md1: decimal(8,5) (nullable = true)
 |-- md2: decimal(8,5) (nullable = true)
 |-- md3: decimal(8,5) (nullable = true)
 |-- md5: decimal(8,5) (nullable = true)
 |-- jmd1: decimal(8,5) (nullable = true)
 |-- jmd2: decimal(8,5) (nullable = true)
 |-- jmd3: decimal(8,5) (nullable = true)
 |-- jmd5: decimal(8,5) (nullable = true)



In [97]:
store_in_kudu(df_equipes,'equipes')

In [97]:
equipes = read_from_kudu('equipes')
equipes.createOrReplaceTempView('equipes')

In [26]:
equipes.printSchema()

root
 |-- equipe: string (nullable = false)
 |-- jogos: decimal(8,5) (nullable = true)
 |-- vitorias: decimal(8,5) (nullable = true)
 |-- derrotas: decimal(8,5) (nullable = true)
 |-- md1: decimal(8,5) (nullable = true)
 |-- md2: decimal(8,5) (nullable = true)
 |-- md3: decimal(8,5) (nullable = true)
 |-- md5: decimal(8,5) (nullable = true)
 |-- jmd1: decimal(8,5) (nullable = true)
 |-- jmd2: decimal(8,5) (nullable = true)
 |-- jmd3: decimal(8,5) (nullable = true)
 |-- jmd5: decimal(8,5) (nullable = true)



# Neo4j Data Pre Processing
This step will use the loaded data in Apache Kudu to pre process specific team win rates, and insert that data into de Neo4j database for later use during result prediction.

In [29]:
# Creating all the nodes
team_node_schema = StructType([StructField("name", StringType(), False),\
                     StructField("mostPicked", StringType(), True),\
                     StructField("mostBanned", StringType(), True),\
                     StructField("mostWon", StringType(), True),\
                     StructField("mostLost", StringType(), True)])

list_team_name = spark.sql("SELECT DISTINCT equipe FROM equipes")

rows = [Row(name=row["equipe"], mostPicked=None,mostBanned=None,mostWon=None,mostLost=None) for row in list_team_name.collect()]

df = spark.createDataFrame(rows,schema=team_node_schema)
df.write.format("org.neo4j.spark.DataSource")\
 .option("url", "bolt://neo4j:7687")\
 .option("authentication.type", "none")\
 .option("labels", ":Team")\
 .mode("Append")\
 .save()

In [None]:
# Populating relationships
list_map_name = spark.sql("SELECT DISTINCT mapa FROM proc")
df_collect = df_teams.collect()
for x in range(df_teams.count()):
    origin_row = df_collect[x]
    origin_team = origin_row['teams']   
    for i in range(df_teams.count()):
        target_row = df_collect[i]
        target_team = target_row['teams']
        
        if origin_team == target_team:
            continue
        jogos = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE (equipe1 = "{origin_team}" and equipe2 = "{target_team}") or (equipe1 = "{target_team}" and equipe2 = "{origin_team}") ) a').collect()[0][0]
        
        if jogos == 0:
            continue
            

        vits = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE (equipe1 = "{origin_team}" and equipe2 = "{target_team}"  and vitorioso = "1")  or (equipe1 = "{target_team}" and equipe2 = "{origin_team}" and vitorioso = "2") )  a').collect()[0][0]

        jmd5 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "5" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}") or (equipe1 = "{target_team}" and equipe2 = "{origin_team}"))) a').collect()[0][0]
        if jmd5 > 0:    
            md5 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "5" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}"  and vitorioso = "1")  or (equipe1 = "{target_team}" and equipe2 = "{origin_team}" and vitorioso = "2")) ) a').collect()[0][0]
    
        jmd3 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "3" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}") or (equipe1 = "{target_team}" and equipe2 = "{origin_team}"))) a').collect()[0][0]
        if jmd3 > 0:
            md3 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "3" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}"  and vitorioso = "1")  or (equipe1 = "{target_team}" and equipe2 = "{origin_team}" and vitorioso = "2")) ) a').collect()[0][0]
    
        jmd2 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "2" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}") or (equipe1 = "{target_team}" and equipe2 = "{origin_team}"))) a').collect()[0][0]
        if jmd2 > 0:
            md2 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "2" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}"  and vitorioso = "1")  or (equipe1 = "{target_team}" and equipe2 = "{origin_team}" and vitorioso = "2")) ) a').collect()[0][0]
    
        jmd1 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "1" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}") or (equipe1 = "{target_team}" and equipe2 = "{origin_team}")) a').collect()[0][0]
        if jmd1 > 0:    
            md1 = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT DISTINCT partida FROM proc WHERE md = "1" and ((equipe1 = "{origin_team}" and equipe2 = "{target_team}"  and vitorioso = "1")  or (equipe1 = "{target_team}" and equipe2 = "{origin_team}" and vitorioso = "2")) ) a').collect()[0][0]

        wr = vits / jogos
        
        txmd5 = 0
        if jmd5 != 0:
            txmd5 = md5/jmd5
        txmd3 = 0
        if jmd3 != 0:
            txmd3 = md3/jmd3
        txmd2 = 0
        if jmd2 != 0:
            txmd2 = md2/jmd2
        txmd1 = 0
        if jmd1 != 0:
            txmd5 = md1/jmd1

        
        
        relationship_type = 'played'
        relationship_df = spark.createDataFrame(
             [(origin_team, target_team, relationship_type,wr, txmd5,txmd3,txmd2,txmd1)],
             ['src', 'dst', 'relationship_type', 'winrate','bo5','bo3','bo2','bo1']
         )
        relationship_df.write \
            .format('org.neo4j.spark.DataSource') \
            .option('url', 'bolt://neo4j:7687') \
            .option("authentication.type", "none")\
            .option('relationship.save.strategy', 'keys') \
            .option("relationship.source.node.keys", "src:name") \
            .option('relationship.source.labels', ':Team') \
            .option("relationship.target.node.keys", "dst:name") \
            .option('relationship.target.labels', ':Team') \
            .option('relationship', relationship_type) \
            .option('relationship.properties', "winrate:winrate,bo5:bo5,bo3:bo3,bo2:bo2,bo1:bo1") \
            .mode('append') \
            .save()



# Result Prediction
This is the main step to be used in result prediction, it will load data from Neo4j, while simultaneously processing raw generic data present in Apache Kudu. Results from both databases will then be fed into an algorithm that predicts the winner of the match.

In [123]:
def generalMapWR(team, map):
    games = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida,mapa FROM jogos WHERE mapa = "{map}" and (equipe1 = "{team}" or equipe2 = "{team}")) a').collect()[0][0]
    if games == 0:
            return 0
    wins = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida,mapa FROM jogos WHERE vitorioso = "{team}" and mapa = "{map}" and (equipe1 = "{team}" or equipe2 = "{team}")) a').collect()[0][0]
    return float(wins/games)

def generalMapSideWR(team, map, side):
    if side == 'TR': 
        games = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida,mapa FROM jogos WHERE mapa = "{map}" and tr = "{team}") a').collect()[0][0]
        if games == 0:
                return 0
        wins = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida,mapa FROM jogos WHERE vitorioso = "{team}" and mapa = "{map}" and tr = "{team}") a').collect()[0][0]
        return float(wins/games)
    if side == 'CT':
        games = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida,mapa FROM jogos WHERE mapa = "{map}" and ct = "{team}") a').collect()[0][0]
        if games == 0:
                return 0
        wins = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida,mapa FROM jogos WHERE vitorioso = "{team}" and mapa = "{map}" and ct = "{team}" ) a').collect()[0][0]
        return float(wins/games)
    return 0

def generalSideWR(team, side):
    if side == 'TR':
        games = spark.sql(f'SELECT COUNT(partida) as jogos FROM (SELECT partida, mapa FROM jogos WHERE tr = "{team}") a').collect()[0][0]
        if games == 0:
            return 0
        wins = spark.sql(f'SELECT COUNT(partida) as jogos FROM (SELECT partida,mapa FROM jogos WHERE vitorioso = "{team}" and tr = "{team}") a').collect()[0][0]
        return float(wins/games)
    if side == 'CT':
        games = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida, mapa FROM jogos WHERE ct = "{team}") a').collect()[0][0]
        if games == 0:
            return 0
        wins = spark.sql(f'SELECT COUNT(*) as jogos FROM (SELECT partida,mapa FROM jogos WHERE vitorioso = "{team}" and ct = "{team}") a').collect()[0][0]
        return float(wins/games)
    return 0

def generalTeamWR(team):
    games = spark.sql(f'SELECT jogos FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    if games == 0:
        return 0
    wins = spark.sql(f'SELECT vitorias FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    return float(wins/games)

def generalBO1WR(team):
    games = spark.sql(f'SELECT jmd1 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    if games == 0:
        return 0
    wins = spark.sql(f'SELECT md1 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    return float(wins/games)

def generalBO2WR(team):
    games = spark.sql(f'SELECT jmd2 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    if games == 0:
        return 0
    wins = spark.sql(f'SELECT md2 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    return float(wins/games)

def generalBO3WR(team):
    games = spark.sql(f'SELECT jmd3 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    if games == 0:
        return 0
    wins = spark.sql(f'SELECT md3 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    return float(wins/games)

def generalBO5WR(team):
    games = spark.sql(f'SELECT jmd5 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    if games == 0:
        return 0
    wins = spark.sql(f'SELECT md5 FROM equipes WHERE equipe = "{team}"').collect()[0][0]
    return float(wins/games)

def specificTeamToTeamWR(team1, team2):
    df = spark.read.format("org.neo4j.spark.DataSource")\
        .option("url", "bolt://neo4j:7687")\
        .option("authentication.type", "none")\
        .option("query", f"MATCH (n1:Team)-[r:played]->(n2:Team) WHERE n1.name = '{team1}' AND n2.name = '{team2}' RETURN r.winrate")\
        .load()
    if df.isEmpty():
        return 0
    return float(df.collect()[0][0])

def specificTeamToTeamBO1WR(team1, team2):
    df = spark.read.format("org.neo4j.spark.DataSource")\
        .option("url", "bolt://neo4j:7687")\
        .option("authentication.type", "none")\
        .option("query", f"MATCH (n1:Team)-[r:played]->(n2:Team) WHERE n1.name = '{team1}' AND n2.name = '{team2}' RETURN r.bo1")\
        .load()
    if df.isEmpty():
        return 0
    return float(df.collect()[0][0])

def specificTeamToTeamBO2WR(team1, team2):
    df = spark.read.format("org.neo4j.spark.DataSource")\
        .option("url", "bolt://neo4j:7687")\
        .option("authentication.type", "none")\
        .option("query", f"MATCH (n1:Team)-[r:played]->(n2:Team) WHERE n1.name = '{team1}' AND n2.name = '{team2}' RETURN r.bo2")\
        .load()
    if df.isEmpty():
        return 0
    return float(df.collect()[0][0])

def specificTeamToTeamBO3WR(team1, team2):
    df = spark.read.format("org.neo4j.spark.DataSource")\
        .option("url", "bolt://neo4j:7687")\
        .option("authentication.type", "none")\
        .option("query", f"MATCH (n1:Team)-[r:played]->(n2:Team) WHERE n1.name = '{team1}' AND n2.name = '{team2}' RETURN r.bo3")\
        .load()
    if df.isEmpty():
        return 0
    return float(df.collect()[0][0])

def specificTeamToTeamBO5WR(team1, team2):
    df = spark.read.format("org.neo4j.spark.DataSource")\
        .option("url", "bolt://neo4j:7687")\
        .option("authentication.type", "none")\
        .option("query", f"MATCH (n1:Team)-[r:played]->(n2:Team) WHERE n1.name = '{team1}' AND n2.name = '{team2}' RETURN r.bo5")\
        .load()
    if df.isEmpty():
        return 0
    return float(df.collect()[0][0])

def predict(team1 = None, side1 = None, team2 = None, map = None, bo=None):
    wr = 0
    side_wr = 0
    map_wr = 0
    map_side_wr = 0
    team_to_team_wr = 0
    team_to_team_side_wr = 0
    bo_wr = 0
    team_to_team_bo_wr = 0
    if team1 == None:
        return "Pelo menos um time deve ser fornecido"
    wr = generalTeamWR(team1);
    if side1 != None:
        side_wr = generalSideWR(team1,side1)
    if map != None:
        map_wr = generalMapWR(team1,map)
    if bo != None:
        match bo:
            case "md1":
                bo_wr = generalBO1WR(team1)
            case "md2":
                bo_wr = generalBO2WR(team1)
            case "md3":
                bo_wr = generalBO3WR(team1)
            case "md5":
                bo_wr = generalBO5WR(team1)
            case _:
                bo_wr = 0
    if side1 != None and map != None:
        map_side_wr = generalMapSideWR(team1,map,side1)
    if team2 != None:
        team_to_team_wr = specificTeamToTeamWR(team1,team2)
    if team2 != None and bo != None:
        match bo:
            case "md1":
                team_to_team_bo_wr = specificTeamToTeamBO1WR(team1, team2)
            case "md2":
                team_to_team_bo_wr = specificTeamToTeamBO2WR(team1, team2)
            case "md3":
                team_to_team_bo_wr = specificTeamToTeamBO3WR(team1, team2)
            case "md5":
                team_to_team_bo_wr = specificTeamToTeamBO5WR(team1, team2)
            case _:
                team_to_team_bo_wr = 0

    return 0.05*wr+0.05*bo_wr+0.05*side_wr+0.1*team_to_team_wr+0.1*team_to_team_bo_wr+0.15*team_to_team_side_wr+0.25*map_wr+0.25*map_side_wr

In [90]:
# equipes.show()
print(generalBO1WR("RED Canids"))
print(generalBO2WR("RED Canids"))
print(generalBO3WR("MAD Lions"))
print(generalTeamWR("Complexity"))

1.0
0.5
0.375
0.6


In [102]:
# equipes.show()
print(specificTeamToTeamBO2WR("Keyd","RED Canids"))
print(specificTeamToTeamBO3WR("G2","Liquid"))
print(specificTeamToTeamBO5WR("Evidence","2K"))
print(specificTeamToTeamBO5WR("Evidence","2K"))
print(specificTeamToTeamWR("Natus Vincere","Vitality"))

1.0
1.0
1.0
1.0
0.5


In [115]:
print(generalMapWR("RED Canids", "Vertigo"))
print(generalMapSideWR("RED Canids", "Vertigo","TR"))
print(generalMapSideWR("RED Canids", "Vertigo","CT"))

0.75
1.0
0.6666666666666666


In [127]:
print(predict("fnatic","TR","RED Canids","Vertigo","md3"))
print(predict("RED Canids","TR","fnatic","Vertigo","md3"))
print(predict("RED Canids","TR","fnatic",bo="md3"))

0.19514285714285712
0.5019607843137255
0.0644607843137255


# Bonus Queries
This section is used to call other queries not linked to result prediction like objective queries or player statistics.

In [None]:
# low priority TODO 
# Define functions to execute objective queries in Neo4j
# Define functions to execute generic statistics queries in Apache Kudu
# Define interface to call above functions