In [37]:
# Importing required libraries
import numpy as np
import pandas as pd
import requests
import time
from auth_key import get_auth_key

In [39]:
auth_key = get_auth_key()

In [41]:
# Takes player nickname and auth_key as arguements and returns player id and elo as columns of dataframe
def get_player_data(nickname, auth_key):

    # parameters for requests func
    parameters = {
        "nickname": nickname 
    }
    
    # faceit url for player id data
    url_player_id = "https://open.faceit.com/data/v4/players"

    # API key
    headers = {
    "Authorization": auth_key
    }

    # requesting player info from API
    response = requests.get(url_player_id, 
                            headers = headers, 
                            params = parameters)


    # conditional to catch request errors
    if response.status_code == 404:
        print(f"Error 404: Resource not found. Requested URL: {response.url}")
        
        # Return empty dataframe
        stats = pd.DataFrame()
        return stats
        
    elif response.status_code == 200:
        player_data = response.json()

        # try/except statement to catch KeyErrors 
        try:
            player_data_dict = {"player_id": player_data['player_id'],
                            "faceit_elo": player_data["games"]["cs2"]["faceit_elo"]}
            player_data_df = pd.DataFrame([player_data_dict])
            
        except KeyError:
            # KeyErrors generally occur due to CSGO match ID
            print("match_id for CSGO")
            
             # Returns empty dataframe
            stats = pd.DataFrame()
            return stats
            
            
        
        return player_data_df

    # for uncommon request errors
    else:
        print(f"Error {response.status_code}: {response.text}")
        stats = pd.DataFrame()
        return stats
    
    
    
    

In [43]:
# returns player stats for the last 100 games played (if 100 games exist)
def get_player_stats(player_id, auth_key):
    
    
    # parameters needed for API request
    parameters = {
        "player_id": player_id,
        "limit": 100
    }

    # URL used for API request
    game_id = "cs2"
    url_stats = f"https://open.faceit.com/data/v4/players/{player_id}/games/{game_id}/stats"
    
    headers = {
        "Authorization": auth_key
    }


    # requests player data for last 100 games
    response = requests.get(url_stats, 
                            headers = headers, 
                            params = parameters)

    # Flattens nested json into dataframe
    data = response.json()
    stats_df = pd.json_normalize(data['items'], sep = '_')

    # returns player stats dataframe
    return stats_df

In [45]:
# Takes player stats dataframe and match map and returns individual player stats for that map
def player_stats_calc(df, map):
    
    # testing if map exists
    try:
        test_df = df[df.stats_Map == map]
    # If map does not exist return empty dataframe
    except:
        stats = pd.DataFrame()
        return stats

    
    # Converting Result and K/R ratio columns to numeric
    test_df.loc[:, ['stats_Result']] = pd.to_numeric(test_df['stats_Result'])
    test_df.loc[:, ['stats_K/R Ratio']] = pd.to_numeric(test_df['stats_K/R Ratio'])
        

    try:
        # Calculating win percentage
        map_win_percentage = (test_df['stats_Result'].sum() / len(test_df)) * 100
        
    except ZeroDivisionError:
        stats = pd.DataFrame()
        print("ZeroDivisonError")
        return stats

        
        
    try:
        # Calculating average K/R ratio
        average_KR_ratio = test_df['stats_K/R Ratio'].mean()
    
    except ZeroDivisionError:    
        stats = pd.DataFrame()
        print("ZeroDivisonError")
        return stats
        
    
    # Combining stats and renaming axis for data frame
    stats = pd.DataFrame({"Win Percentage": map_win_percentage,
                         "Average K/R Ratio": average_KR_ratio}, index = [0])
    # returning stats data frame
    return stats

In [47]:
# Pulls player ID from leaderboard
def get_ranking_player_ids(offset, auth_key):

    # requests parameters
    parameters = {
        "offset": offset,
        "limit": 100
    }
    region = "NA"
    game_id = "cs2"
    url_leaderboard = f"https://open.faceit.com/data/v4/rankings/games/{game_id}/regions/{region}"
    headers = {
        "Authorization": auth_key
    }
    
    
    # requesting API for 100 players' data from leaderboard
    response = requests.get(url_leaderboard, 
                            headers = headers, 
                            params = parameters)

    # Conditional to catch API request errors
    if response.status_code == 404:
        print("Error 404: Resource not found.")
        print(f"Requested URL: {response.url}")

    # Flattening nested json into dataframe
    elif response.status_code == 200:
        data = response.json()
        data_frame_rankings = pd.json_normalize(data['items'], 
                                                sep = '_')
        
    # Extracting player_id column
        return data_frame_rankings['player_id']
        
    else:
        print(f"Error {response.status_code}: {response.text}")


In [49]:
# Uses player ID to pull 3 match_id from match histroy
def match_data_selection(player_id, auth_key):

    # parameters for API request
    parameters = {
        "limit": 3
    }
    url_match_history = f"https://open.faceit.com/data/v4/players/{player_id}/history"

    # requesting match history from API for player
    response = requests.get(url_match_history, 
                            headers = headers, 
                            params = parameters)

    # Conditional to check for request errors
    if response.status_code == 200:

        # Flattening JSON to dataframe
        data = response.json()
        df_match_id = pd.json_normalize(data['items'], 
                                        sep = '_')

        # returning match_id column
        return df_match_id['match_id']
        
    else:
        print(f"Error {response.status_code}: {response.text}")
    
    

In [51]:
# Uses match_ID to calculate dataframe of match stats for each team
def calculate_match_stats(match_id, auth_key, count):

    # parameters for API request
    headers = {
        "Authorization": auth_key
        }
    url_match_id = f"https://open.faceit.com/data/v4/matches/{match_id}/stats"

    # API request for match statsitics
    response = requests.get(url_match_id, 
                            headers = headers)
        

    # Flattens nested json
    match_data = response.json()

    # attempts to flatten nested JSON
    try:
        rounds_flat = pd.json_normalize(match_data['rounds'], 
                                        sep = '_')
    # returns empty dataframe if rounds does not exist 
    except KeyError:
        stats = pd.DataFrame()
        return stats
    
    # Flatten teams, nested in match_data
    teams_flat = pd.json_normalize(
        match_data['rounds'],
        record_path = 'teams',
        meta = ['match_id'],
        sep = '_'
    )
    
    # Flatten players data
    players_flat = pd.json_normalize(
        match_data['rounds'],
        record_path = ['teams', 'players'],
        meta = ['match_id', ['teams', 'team_id']],
        sep = '_'
    )

    # flatten teams data
    teams_df = pd.json_normalize(match_data['rounds'], 
                                 record_path = 'teams', 
                                 meta = ['match_id'], 
                                 sep = '_')
    
    # creating columns for match data frame
    columns = ["win", "map", "Team_A_avg_win_percentage", "Team_A_avg_KR", "Team_A_avg_elo",
                  "Team_B_avg_win_percentage", "Team_B_avg_KR", "Team_B_avg_elo", "Match ID"]

    # creating match data frame
    
    data_df = pd.DataFrame(columns = columns)
    data_df.columns

    # checking if match data exists, or returns empty dataframe
    try:
        if teams_df.loc[0, 'team_stats_Team Win'] == "1":
            data_df.loc[0, "win"] = "team a"
        else:
            data_df.loc[0, "win"] = "team b"
        data_df.loc[0, "map"] = rounds_flat.round_stats_Map[0]
    
        data_df.loc[0, "Match ID"] = teams_df.match_id[0]
        
    except KeyError:
        stats = pd.DataFrame()
        return stats
    

    
    
    # Team A Stats
    team_a_df = pd.DataFrame(teams_df.players[0])
    
    team_a_nicknames = team_a_df.nickname
    team_a_id_elo_list = []

    # loops over nicknames column and appends list of player ids for team A
    for nickname in team_a_nicknames:
        id_elo = get_player_data(nickname, auth_key)
        
        # returns empty dataframe if player elo DNE
        if id_elo.empty:
            print(count)
            return id_elo
        else:
            team_a_id_elo_list.append(id_elo)

    # creates column of player IDs from list
    team_a_id_elo_df = pd.concat(team_a_id_elo_list)

    # adds mean of team elo to match data df
    data_df.loc[0 , "Team_A_avg_elo"] = team_a_id_elo_df["faceit_elo"].mean()
    
    # Uses player ID to calculate individual player stats (K/R and win percentage)
    team_a_stats_list = []
    for id in team_a_df.player_id:
        # uses get_player_stats to calculate individual stats
        stats_df = get_player_stats(id, auth_key)
        stats = player_stats_calc(stats_df, data_df["map"][0])
        # if returned df is empty, return empty match dataframe
        if stats.empty:
            # used to track missing/invalid data
            print(count)
            print("Unable to compute stats, may be error with map used")
            return stats

        # append individual stats to list
        else: 
            team_a_stats_list.append(stats)

    # creates win percentage and K/R ratio columns for team A 
    team_a_stats_df = pd.concat(team_a_stats_list)
    data_df.loc[0 , "Team_A_avg_win_percentage"] = team_a_stats_df["Win Percentage"].mean()
    data_df.loc[0 , "Team_A_avg_KR"] = team_a_stats_df["Average K/R Ratio"].mean()
        
    
    
    
    
    # Team B stats, repeats same process used for team A stats
    
    team_b_df = pd.DataFrame(teams_df.players[1])
    team_b_nicknames = team_b_df.nickname
    team_b_id_elo_list = []
    
    for nickname in team_b_nicknames:
        
        if id_elo.empty:
            print(count)
            return id_elo
            
        else:
            team_b_id_elo_list.append(id_elo)
    
    team_b_id_elo_df = pd.concat(team_b_id_elo_list)
    team_b_id_elo_df
    data_df.loc[0 , "Team_B_avg_elo"] = team_b_id_elo_df["faceit_elo"].mean()
    
    
    team_b_stats_list = []
    for id in team_b_df.player_id:
        
        stats_df = get_player_stats(id, auth_key)
        stats = player_stats_calc(stats_df, data_df["map"][0])
        
        if stats.empty:
            print(count)
            print("Unable to compute stats, may be error with map used")
            return stats
            
        else: 
            team_b_stats_list.append(stats)
            
    team_b_stats_df = pd.concat(team_b_stats_list)
    data_df.loc[0 , "Team_B_avg_win_percentage"] = team_b_stats_df["Win Percentage"].mean()
    data_df.loc[0 , "Team_B_avg_KR"] = team_b_stats_df["Average K/R Ratio"].mean()

    # Returns row of match data for match_id given
    return data_df

In [118]:
# Creates excel file to store player_ids
excel_filename_player_id = 'player_ids_new.xlsx'
with pd.ExcelWriter(excel_filename_player_id, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [131]:
# pulls 100 player ids from ranking leaderboard each iteration
# offset parameter determines starting point of pull, helps when API call times out
offset = 49700
while offset < 50000:
    player_df = get_ranking_player_ids(offset, auth_key)
    offset += 100
    
    with pd.ExcelWriter(excel_filename, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        player_df.to_excel(writer, index = False, header = offset == 0, startrow = writer.sheets['Sheet1'].max_row)
    


In [95]:
# Reading in player ID dataframe
player_ids_df = pd.read_excel("player_ids.xlsx")
player_ids_df.columns = ["player_ids"]
player_ids_df

Unnamed: 0,player_ids
0,884d38aa-b92e-420a-ab15-0d600087c366
1,e8f354c3-337e-4d64-b2a8-c681262c8ed8
2,93849291-b93b-47a4-8351-b05d8d7026a5
3,71ea9955-8400-4553-87a5-4c325923d21f
4,e1c8511d-52b8-4c21-a781-1157d201a9f6
...,...
49995,1c230639-c8a4-4848-8e8b-5cc226d080ed
49996,15658775-7875-48b6-8d82-69764be488b5
49997,155b18ef-fd97-4121-a691-444180a6a5d0
49998,0a7370b4-dfe2-4e71-84f7-41c868180520


In [230]:
# taking random sample from player_id df
player_ids_sample_df = player_ids_df.sample(n = 100)
player_ids_sample_df

Unnamed: 0,player_ids
11709,d887bdad-0aed-48d9-9b6c-15f56e8a61ec
32111,0a1d3574-0e40-4836-a26a-1ef4f85fc9e1
10668,13004c45-a23c-4d1c-b714-5ef3391e0367
31249,135007ec-80bb-468e-82d7-4902c6b8e948
30673,fdc094f8-d20a-471c-9654-a37cc247960a
...,...
9368,b9624f14-b818-4e77-ba58-2a4176da4639
15780,c470d0d4-ecda-41df-9ec0-83115a6363c8
5234,79447dc3-8db6-490f-a650-9e5e8b48c773
26730,3c6e7053-90f3-42ef-a3fd-a29dd0c56405


In [175]:
# creating excel sheet for match_ids
excel_filename_match_id = 'match_ids.xlsx'
with pd.ExcelWriter(excel_filename_match_id, engine = 'openpyxl', mode = 'w') as writer:
     pd.DataFrame().to_excel(writer, index = False)

In [236]:
# Applys match_data_selection over player_id sampled dataframe
match_ids_expanded = player_ids_sample_df["player_ids"].apply(lambda x: match_data_selection(x, auth_key))

# creates dataframe out of match_ids
match_ids_expanded = match_ids_expanded.melt(value_name = "match_id").dropna()[["match_id"]]
match_ids_expanded

Unnamed: 0,match_id
0,1-fe4702c6-f6c6-48ca-a81b-0aedc8f31444
1,1-051c5a18-6a99-4e5e-bef7-ed1143474b33
2,1-8f29e3ec-f49d-4d05-8eac-98993126c405
3,1-a40b6b42-9441-4681-9206-a2085e92f800
4,1-c33343e1-5b8f-4cca-995b-20685ec33746
...,...
2995,1-da5d765d-5ff7-4ef2-b746-2e95463b015a
2996,1-5acfd781-934e-4155-900f-c5318babd62a
2997,1-8f51de72-a949-4a55-b782-5f2b359a5e10
2998,1-ba8a9159-2d8b-45b6-b56e-f071a5e5189c


In [238]:
# Appending match_id data to match_id excel sheet
with pd.ExcelWriter(excel_filename_match_id, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        match_ids_expanded.to_excel(writer, index = False, header = offset == 0, startrow = writer.sheets['Sheet1'].max_row)

In [91]:
# Used for tracking missing/invalid data while calulating match stats
count = 0
calculate_match_stats(match_ids_expanded["match_id"][1428], 
                      auth_key, 
                      count)

NameError: name 'match_ids_expanded' is not defined

In [950]:
# Creating excel sheet to store data for win prediction
excel_filename_data_win  = "data_win_prediction.xlsx"
with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [1017]:
# Loops over match_id column and returns match data row, then appends to excel file
count = 0
for match_id in match_ids_expanded["match_id"][2568:]:
    count += 1
    data_df = calculate_match_stats(match_id, auth_key, count)

    # Appends to data_win_predction excel sheet
    with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        data_df.to_excel(writer, index = False, header = offset == 0, startrow = writer.sheets['Sheet1'].max_row)

# delays iterations for API limits
time.sleep(3)

match_id for CSGO
1
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=adamha54
3
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=JCWCS
4
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=1shrek
5
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=Karnani02
9
match_id for CSGO
14
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=oreosssss
16
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=prblmchld
18
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=vlre-
20
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=dak3-_-
21
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=yobrt
22
Error 404