In [5]:
# Importing required libraries
import numpy as np
import pandas as pd
import requests
import time
from auth_key import get_auth_key

In [7]:
auth_key = get_auth_key()

In [9]:
# Takes player nickname and auth_key as arguements and returns player id and elo as columns of dataframe
def get_player_data(nickname, auth_key):

    # parameters for requests func
    parameters = {
        "nickname": nickname 
    }
    
    # faceit url for player id data
    url_player_id = "https://open.faceit.com/data/v4/players"

    # API key
    headers = {
    "Authorization": auth_key
    }

    # requesting player info from API
    response = requests.get(url_player_id, 
                            headers = headers, 
                            params = parameters)


    # conditional to catch request errors
    if response.status_code == 404:
        print(f"Error 404: Resource not found. Requested URL: {response.url}")
        
        # Return empty dataframe
        stats = pd.DataFrame()
        return stats
        
    elif response.status_code == 200:
        player_data = response.json()

        # try/except statement to catch KeyErrors 
        try:
            player_data_dict = {"player_id": player_data['player_id'],
                            "faceit_elo": player_data["games"]["cs2"]["faceit_elo"]}
            player_data_df = pd.DataFrame([player_data_dict])
            
        except KeyError:
            # KeyErrors generally occur due to CSGO match ID
            print("match_id for CSGO")
            
             # Returns empty dataframe
            stats = pd.DataFrame()
            return stats
            
            
        
        return player_data_df

    # for uncommon request errors
    else:
        print(f"Error {response.status_code}: {response.text}")
        stats = pd.DataFrame()
        return stats
    
    
    
    

In [11]:
# returns player stats for the last 100 games played (if 100 games exist)
def get_player_stats(player_id, auth_key):
    
    
    # parameters needed for API request
    parameters = {
        "player_id": player_id,
        "limit": 100
    }

    # URL used for API request
    game_id = "cs2"
    url_stats = f"https://open.faceit.com/data/v4/players/{player_id}/games/{game_id}/stats"
    
    headers = {
        "Authorization": auth_key
    }


    # requests player data for last 100 games
    response = requests.get(url_stats, 
                            headers = headers, 
                            params = parameters)

    # Flattens nested json into dataframe
    data = response.json()
    stats_df = pd.json_normalize(data['items'], sep = '_')

    # returns player stats dataframe
    return stats_df

In [13]:
# Takes player stats dataframe and match map and returns individual player stats for that map
def player_stats_calc(df, map):
    
    # testing if map exists
    try:
        test_df = df[df.stats_Map == map]
    # If map does not exist return empty dataframe
    except:
        stats = pd.DataFrame()
        return stats

    
    # Converting Result and K/R ratio columns to numeric
    test_df.loc[:, ['stats_Result']] = pd.to_numeric(test_df['stats_Result'])
    test_df.loc[:, ['stats_K/R Ratio']] = pd.to_numeric(test_df['stats_K/R Ratio'])
        

    try:
        # Calculating win percentage
        map_win_percentage = (test_df['stats_Result'].sum() / len(test_df)) * 100
        
    except ZeroDivisionError:
        map_win_percentage = 0
        print("ZeroDivisonError")


        
        
    try:
        # Calculating average K/R ratio
        average_KR_ratio = test_df['stats_K/R Ratio'].mean()
    
    except ZeroDivisionError:    
        stats = pd.DataFrame()
        print("ZeroDivisonError")
        return stats
        
    
    # Combining stats and renaming axis for data frame
    stats = pd.DataFrame({"Win Percentage": map_win_percentage,
                         "Average K/R Ratio": average_KR_ratio}, index = [0])
    # returning stats data frame
    return stats

In [15]:
# Pulls player ID from leaderboard
def get_ranking_player_ids(offset, auth_key):

    # requests parameters
    parameters = {
        "offset": offset,
        "limit": 100
    }
    region = "NA"
    game_id = "cs2"
    url_leaderboard = f"https://open.faceit.com/data/v4/rankings/games/{game_id}/regions/{region}"
    headers = {
        "Authorization": auth_key
    }
    
    
    # requesting API for 100 players' data from leaderboard
    response = requests.get(url_leaderboard, 
                            headers = headers, 
                            params = parameters)

    # Conditional to catch API request errors
    if response.status_code == 404:
        print("Error 404: Resource not found.")
        print(f"Requested URL: {response.url}")

    # Flattening nested json into dataframe
    elif response.status_code == 200:
        data = response.json()
        data_frame_rankings = pd.json_normalize(data['items'], 
                                                sep = '_')
        
    # Extracting player_id column
        return data_frame_rankings['player_id']
        
    else:
        print(f"Error {response.status_code}: {response.text}")


In [17]:
# Uses player ID to pull 3 match_id from match histroy
def match_data_selection(player_id, auth_key):

    # parameters for API request
    parameters = {
        "limit": 3
    }

    # API key
    headers = {
    "Authorization": auth_key
    }
    
    url_match_history = f"https://open.faceit.com/data/v4/players/{player_id}/history"

    # requesting match history from API for player
    response = requests.get(url_match_history, 
                            headers = headers, 
                            params = parameters)

    # Conditional to check for request errors
    if response.status_code == 200:

        # Flattening JSON to dataframe
        data = response.json()
        df_match_id = pd.json_normalize(data['items'], 
                                        sep = '_')

        # returning match_id column
        return df_match_id['match_id']
        
    else:
        print(f"Error {response.status_code}: {response.text}")
    
    

In [19]:
# Uses match_ID to calculate dataframe of match stats for each team
def calculate_match_stats(match_id, auth_key, count):

    # parameters for API request
    headers = {
        "Authorization": auth_key
        }
    url_match_id = f"https://open.faceit.com/data/v4/matches/{match_id}/stats"

    # API request for match statsitics
    response = requests.get(url_match_id, 
                            headers = headers)
        

    # Flattens nested json
    match_data = response.json()

    # attempts to flatten nested JSON
    try:
        rounds_flat = pd.json_normalize(match_data['rounds'], 
                                        sep = '_')
    # returns empty dataframe if rounds does not exist 
    except KeyError:
        stats = pd.DataFrame()
        return stats
    
    # Flatten teams, nested in match_data
    teams_flat = pd.json_normalize(
        match_data['rounds'],
        record_path = 'teams',
        meta = ['match_id'],
        sep = '_'
    )
    
    # Flatten players data
    players_flat = pd.json_normalize(
        match_data['rounds'],
        record_path = ['teams', 'players'],
        meta = ['match_id', ['teams', 'team_id']],
        sep = '_'
    )

    # flatten teams data
    teams_df = pd.json_normalize(match_data['rounds'], 
                                 record_path = 'teams', 
                                 meta = ['match_id'], 
                                 sep = '_')
    
    # creating columns for match data frame
    columns = ["win", "map", "Team_A_avg_win_percentage", "Team_A_avg_KR", "Team_A_avg_elo",
                  "Team_B_avg_win_percentage", "Team_B_avg_KR", "Team_B_avg_elo", "Match ID"]

    # creating match data frame
    
    data_df = pd.DataFrame(columns = columns)
    data_df.columns

    # checking if match data exists, or returns empty dataframe
    try:
        if teams_df.loc[0, 'team_stats_Team Win'] == "1":
            data_df.loc[0, "win"] = "team a"
        else:
            data_df.loc[0, "win"] = "team b"
        data_df.loc[0, "map"] = rounds_flat.round_stats_Map[0]
    
        data_df.loc[0, "Match ID"] = teams_df.match_id[0]
        
    except KeyError:
        stats = pd.DataFrame()
        return stats
    

    
    
    # Team A Stats
    team_a_df = pd.DataFrame(teams_df.players[0])
    
    team_a_nicknames = team_a_df.nickname
    team_a_id_elo_list = []

    # loops over nicknames column and appends list of player ids for team A
    for nickname in team_a_nicknames:
        id_elo = get_player_data(nickname, auth_key)
        
        # returns empty dataframe if player elo DNE
        if id_elo.empty:
            print(count)
            return id_elo
        else:
            team_a_id_elo_list.append(id_elo)

    # creates column of player IDs from list
    team_a_id_elo_df = pd.concat(team_a_id_elo_list)

    # adds mean of team elo to match data df
    data_df.loc[0 , "Team_A_avg_elo"] = team_a_id_elo_df["faceit_elo"].mean()
    
    # Uses player ID to calculate individual player stats (K/R and win percentage)
    team_a_stats_list = []
    for id in team_a_df.player_id:
        # uses get_player_stats to calculate individual stats
        stats_df = get_player_stats(id, auth_key)
        stats = player_stats_calc(stats_df, data_df["map"][0])
        # if returned df is empty, return empty match dataframe
        if stats.empty:
            # used to track missing/invalid data
            print(count)
            print("Unable to compute stats, may be error with map used")
            return stats

        # append individual stats to list
        else: 
            team_a_stats_list.append(stats)

    # creates win percentage and K/R ratio columns for team A 
    team_a_stats_df = pd.concat(team_a_stats_list)
    data_df.loc[0 , "Team_A_avg_win_percentage"] = team_a_stats_df["Win Percentage"].mean()
    data_df.loc[0 , "Team_A_avg_KR"] = team_a_stats_df["Average K/R Ratio"].mean()
        
    
    
    
    
    # Team B stats, repeats same process used for team A stats
    
    team_b_df = pd.DataFrame(teams_df.players[1])
    team_b_nicknames = team_b_df.nickname
    team_b_id_elo_list = []
    
    for nickname in team_b_nicknames:
        
        if id_elo.empty:
            print(count)
            return id_elo
            
        else:
            team_b_id_elo_list.append(id_elo)
    
    team_b_id_elo_df = pd.concat(team_b_id_elo_list)
    team_b_id_elo_df
    data_df.loc[0 , "Team_B_avg_elo"] = team_b_id_elo_df["faceit_elo"].mean()
    
    
    team_b_stats_list = []
    for id in team_b_df.player_id:
        
        stats_df = get_player_stats(id, auth_key)
        stats = player_stats_calc(stats_df, data_df["map"][0])
        
        if stats.empty:
            print(count)
            print("Unable to compute stats, may be error with map used")
            return stats
            
        else: 
            team_b_stats_list.append(stats)
            
    team_b_stats_df = pd.concat(team_b_stats_list)
    data_df.loc[0 , "Team_B_avg_win_percentage"] = team_b_stats_df["Win Percentage"].mean()
    data_df.loc[0 , "Team_B_avg_KR"] = team_b_stats_df["Average K/R Ratio"].mean()

    # Returns row of match data for match_id given
    return data_df

In [118]:
# Creates excel file to store player_ids
excel_filename_player_id = 'player_ids_new.xlsx'
with pd.ExcelWriter(excel_filename_player_id, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [131]:
# pulls 100 player ids from ranking leaderboard each iteration
# offset parameter determines starting point of pull, helps when API call times out
offset = 49700
while offset < 50000:
    player_df = get_ranking_player_ids(offset, auth_key)
    offset += 100
    
    with pd.ExcelWriter(excel_filename, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        player_df.to_excel(writer, index = False, header = offset == 0, startrow = writer.sheets['Sheet1'].max_row)
    


In [21]:
# Reading in player ID dataframe
player_ids_df = pd.read_excel("player_ids.xlsx")
player_ids_df.columns = ["player_ids"]
player_ids_df

Unnamed: 0,player_ids
0,884d38aa-b92e-420a-ab15-0d600087c366
1,e8f354c3-337e-4d64-b2a8-c681262c8ed8
2,93849291-b93b-47a4-8351-b05d8d7026a5
3,71ea9955-8400-4553-87a5-4c325923d21f
4,e1c8511d-52b8-4c21-a781-1157d201a9f6
...,...
49995,1c230639-c8a4-4848-8e8b-5cc226d080ed
49996,15658775-7875-48b6-8d82-69764be488b5
49997,155b18ef-fd97-4121-a691-444180a6a5d0
49998,0a7370b4-dfe2-4e71-84f7-41c868180520


In [43]:
# taking random sample from player_id df
player_ids_sample_df = player_ids_df.sample(n = 1000, random_state = 43)
player_ids_sample_df

Unnamed: 0,player_ids
7396,774536f2-3080-4408-8301-aab58592b27f
29820,313797a2-0fa3-4801-853a-72e8774f6bf8
1987,ea2263ca-dd79-4074-8830-31edbf43e5cc
20095,2580cd2b-0960-480f-8356-9f5253fd02bb
32435,b2028d5c-e9d5-4ddd-88f4-1415af694a7e
...,...
2002,3acfa633-5425-46a7-b682-cb05c4a80a0a
28348,0c0511ed-05a7-4c6f-99b5-e3bba68ddec4
13460,9632e015-1061-4b04-8d3f-c722d6fc97ce
15176,b9a3d7c3-974a-490e-8707-a279b0133d5a


In [53]:
# Applys match_data_selection over player_id sampled dataframe
match_ids_expanded = player_ids_sample_df["player_ids"].apply(lambda x: match_data_selection(x, auth_key))

# creates dataframe out of match_ids
match_ids_expanded = match_ids_expanded.melt(value_name = "match_id").dropna()[["match_id"]]
match_ids_expanded

Unnamed: 0,match_id
0,1-0d630199-5350-4b4e-a589-7bba075a4d6d
1,1-927ddcba-b0ab-470d-9a0a-58961304ae47
2,1-d5c2d88a-2d34-42a9-b9d1-b0181cb341b7
3,1-fc6886cd-fad1-4b80-97f2-e7fdae252706
4,1-9403e94d-d656-477a-b7c7-13ff4e159046
...,...
2995,1-7652b032-5804-4242-a9c7-1ee16ef61f4c
2996,7e6fe12c-2de9-4e16-bed2-01e1bc0ddf3a
2997,1-957d5f55-6c39-45ea-84b7-7564be2ae710
2998,1-79fd975d-3770-47fa-ae25-9ba55aa20591


In [54]:
# creating excel sheet for match_ids
excel_filename_match_id = 'match_ids_new.xlsx'
with pd.ExcelWriter(excel_filename_match_id, engine = 'openpyxl', mode = 'w') as writer:
     pd.DataFrame().to_excel(writer, index = False)

In [59]:
# Appending match_id data to match_id excel sheet
with pd.ExcelWriter(excel_filename_match_id, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        match_ids_expanded.to_excel(writer, index = False, header = offset == 0, startrow = writer.sheets['Sheet1'].max_row)

In [61]:
# Used for tracking missing/invalid data while calulating match stats
count = 0
calculate_match_stats(match_ids_expanded["match_id"][0], 
                      auth_key, 
                      count)

Unnamed: 0,win,map,Team_A_avg_win_percentage,Team_A_avg_KR,Team_A_avg_elo,Team_B_avg_win_percentage,Team_B_avg_KR,Team_B_avg_elo,Match ID
0,team b,de_inferno,40.507003,0.715324,1881.4,47.111111,0.679311,1596.0,1-0d630199-5350-4b4e-a589-7bba075a4d6d


In [63]:
# Creating excel sheet to store data for win prediction
excel_filename_data_win  = "data_win_prediction_new.xlsx"
with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [78]:
# Loops over match_id column and returns match data row, then appends to excel file
count = 0
for match_id in match_ids_expanded["match_id"][0:]:
    count += 1
    data_df = calculate_match_stats(match_id, auth_key, count)

    # Appends to data_win_predction excel sheet
    with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        data_df.to_excel(writer, index = False, header = False, startrow = writer.sheets['Sheet1'].max_row)

# delays iterations for API limits
time.sleep(3)

match_id for CSGO
5
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=Sins%C3%A9ar
6
ZeroDivisonError
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=mikep-
11
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=messiuh
12
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=1monster-AI
13
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=faithystun
18
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=WIldWIngg
19
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=ThMoonCC18
20
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=_Jev
21
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=cSynnica

In [23]:
# taking random sample from player_id df for additional data collection
player_ids_sample_df = player_ids_df.sample(n = 1000, random_state = 44)
player_ids_sample_df

Unnamed: 0,player_ids
49457,f2014bee-d0eb-4eb7-a28d-ef59cc040b8d
20706,d7f4899d-20e6-489a-8c36-e3aa16200bd4
46059,d46eb788-9e58-48db-9caa-ccb3917f7182
38076,4950ba52-5ff2-4268-9695-3ef87607fac9
8394,f15d0438-fcb4-45d8-9680-e7170e0c6cec
...,...
44677,03250070-6e00-4c70-a4df-0145bc5ada8b
27045,396716cd-c557-4a9b-a07d-9a9cd2cde1c8
7401,56fa051e-e7a9-4536-9318-be06913e254c
4909,2bc39001-16fc-4c84-80b4-e40c5b2e7519


In [25]:
# Applys match_data_selection over player_id sampled dataframe
match_ids_expanded = player_ids_sample_df["player_ids"].apply(lambda x: match_data_selection(x, auth_key))

# creates dataframe out of match_ids
match_ids_expanded = match_ids_expanded.melt(value_name = "match_id").dropna()[["match_id"]]
match_ids_expanded

Unnamed: 0,match_id
0,1-b7867490-f764-456e-b05c-0172094a75df
1,1-9a1dc7db-9035-4b50-851d-f4a25ce72db2
2,1-9433dec6-0ec3-4a2f-8ceb-c24e925eec4e
3,1-29b163a3-be56-4bbb-9502-a2ff3a709c5a
4,1-7b4fa63b-f814-4765-9fe4-c4c76975c66d
...,...
2995,1-c16f5319-5b10-4b9c-8cc2-182366cf546d
2996,1-4bf74da4-1688-4673-afcf-becd21c0dc44
2997,1-beb29182-930b-467b-b41d-96c4146f709b
2998,1-c32fddbb-a170-42cb-bad1-ddd1e88227c3


In [27]:
# Creating excel sheet to store data for win prediction
excel_filename_data_win  = "data_win_prediction_3.xlsx"
with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [38]:
# Loops over match_id column and returns match data row, then appends to excel file
count = 0
for match_id in match_ids_expanded["match_id"][2443:]:
    count += 1
    data_df = calculate_match_stats(match_id, auth_key, count)

    # Appends to data_win_predction excel sheet
    with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        data_df.to_excel(writer, index = False, header = False, startrow = writer.sheets['Sheet1'].max_row)

# delays iterations for API limits
time.sleep(3)

Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=ZyKersz
5
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=lalalal1
7
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=C4N0WSK1
8
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=sQiky_
9
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=noSLEEP_-
12
match_id for CSGO
17
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=ELTotii
18
ZeroDivisonError
ZeroDivisonError
ZeroDivisonError
ZeroDivisonError
ZeroDivisonError
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=M1LDCURE-
27
match_id for CSGO
28
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=coldushaa
32
Error 404: Resource no

In [40]:
# taking random sample from player_id df for additional data collection
player_ids_sample_df = player_ids_df.sample(n = 1000, random_state = 45)
player_ids_sample_df

Unnamed: 0,player_ids
42462,486057f7-f7a5-45a9-ba77-89ac817510e9
11769,f7104f8f-55db-42b4-8d91-f35ede38c35e
32449,8a5f654d-8cd1-4bd2-af92-b0291fcdff2a
18688,460f0b47-1252-4041-9ef2-d7c02ef2ce22
20342,0c858914-1f19-473d-89cc-67ce9fc03edd
...,...
18422,4906867f-a3fe-460f-99d6-126047072d97
8660,ec7a4a4f-813e-4b72-acaa-d8c51465dc12
172,04801c5c-d737-430a-94d3-769fffb26d02
43452,1cbca6a0-de7d-463a-8f89-ae26457b7153


In [43]:
# Applys match_data_selection over player_id sampled dataframe
match_ids_expanded = player_ids_sample_df["player_ids"].apply(lambda x: match_data_selection(x, auth_key))

# creates dataframe out of match_ids
match_ids_expanded = match_ids_expanded.melt(value_name = "match_id").dropna()[["match_id"]]
match_ids_expanded

Unnamed: 0,match_id
0,1-3996e84d-6a28-4234-a65c-6b6ce3e802a8
1,1-81fdd7a5-feda-446e-b707-e32ea75b792a
2,1-f93179cc-2830-4f76-bb9c-484368b07eb2
3,1-9cb71be2-5e7e-4362-80d1-939cafaf2c8b
4,1-bcc22eb1-976d-4ffc-a38c-0ff4b2f6b56f
...,...
2995,1-962eb897-7925-4abd-bfef-560e29b4bdee
2996,1-a5bf32d8-2afc-4c38-868e-400aea636054
2997,1-21e12a63-4041-4218-9fca-332723be6d8b
2998,1-981e62f2-cf33-427f-a605-013344ac55a7


In [44]:
# Creating excel sheet to store data for win prediction
excel_filename_data_win  = "data_win_prediction_4.xlsx"
with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [60]:
# Loops over match_id column and returns match data row, then appends to excel file
count = 0
for match_id in match_ids_expanded["match_id"][2929:]:
    count += 1
    data_df = calculate_match_stats(match_id, auth_key, count)

    # Appends to data_win_predction excel sheet
    with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        data_df.to_excel(writer, index = False, header = False, startrow = writer.sheets['Sheet1'].max_row)

# delays iterations for API limits
time.sleep(3)

Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=Vajayjajy
2
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=Harpu
4
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=fizzyII
5
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=DENIS1KE
6
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=pmatxjka
9
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=TupRamen
10
ZeroDivisonError
ZeroDivisonError
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=b1gboss-
12
ZeroDivisonError
ZeroDivisonError
ZeroDivisonError
ZeroDivisonError
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=JuanLuis5562
14
Error 404: Resource not found. Requested URL: 

In [64]:
# taking random sample from player_id df for additional data collection
player_ids_sample_df = player_ids_df.sample(n = 1000, random_state = 48)
player_ids_sample_df

Unnamed: 0,player_ids
30644,79922da5-1ae0-4c1b-abf4-6e76e061e541
49743,40aabc93-5cef-4100-8315-24e64c800582
34124,7b4013c7-a7cf-4c87-a703-ed9e338117fc
26177,aaee463a-59f8-40ce-8a49-f79951ae2bc7
1429,8ab5abee-97f6-40e2-9494-489ac739d547
...,...
7851,79e0e67d-dc0c-4fea-b0b9-759802497230
37433,1f8f63cc-0a74-42a1-84ab-807e989dab0c
39887,971dba8e-f55d-453d-b42e-3acb398e9a3a
43000,dacf6a8c-6906-4369-861a-5874a2d17618


In [66]:
# Applys match_data_selection over player_id sampled dataframe
match_ids_expanded = player_ids_sample_df["player_ids"].apply(lambda x: match_data_selection(x, auth_key))

# creates dataframe out of match_ids
match_ids_expanded = match_ids_expanded.melt(value_name = "match_id").dropna()[["match_id"]]
match_ids_expanded

Unnamed: 0,match_id
0,1-e60841fa-8284-42bd-99da-e119eb65692c
1,1-c6cdc470-446c-44a6-bd46-84b16dc0a173
2,1-b086dd58-db96-4ad0-aece-e07d377278a4
3,1-ee6c1b42-d2dc-4b57-8b7b-b09ed4678eaf
4,1-ed4a750f-bdb5-4579-b8de-fb1b14183679
...,...
2995,1-c2807675-d08d-4384-8edc-16b07d1f8e4c
2996,1-857155e1-ee8d-4f8d-be54-5bfabbe830b9
2997,1-3d4bc523-e97f-4449-8a3b-4e5dba723583
2998,1-ab94a008-658e-4212-a705-8bc250664647


In [68]:
# Creating excel sheet to store data for win prediction
excel_filename_data_win  = "data_win_prediction_5.xlsx"
with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [88]:
# Loops over match_id column and returns match data row, then appends to excel file
count = 0
for match_id in match_ids_expanded["match_id"][2623:]:
    count += 1
    data_df = calculate_match_stats(match_id, auth_key, count)

    # Appends to data_win_predction excel sheet
    with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        data_df.to_excel(writer, index = False, header = False, startrow = writer.sheets['Sheet1'].max_row)

# delays iterations for API limits
time.sleep(3)

Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=sidoCS
1
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=pofred
3
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=XOUL-
4
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=BootyCrumb
9
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=Lorenzoo666
10
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=kennyxn
11
ZeroDivisonError
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=jaxluo
15
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=GostICE
17
Error 404: Resource not found. Requested URL: https://open.faceit.com/data/v4/players?nickname=LVL10dad
20
Error 404: Resource not found.

In [90]:
# taking random sample from player_id df for additional data collection
player_ids_sample_df = player_ids_df.sample(n = 1000, random_state = 49)
player_ids_sample_df

Unnamed: 0,player_ids
46034,0bced42e-6d7e-4543-ab76-a2c523e336c6
23960,72733731-65cd-416a-be93-b0e6566285b6
36140,bb010faa-6d46-4c62-99b9-f0de8d9e1467
25252,25f3f504-106b-4f1a-8a04-61fd5d19a11a
40132,c390eaa9-5ed3-46c7-b9af-ff0b92e012cb
...,...
40179,46be239e-8113-4902-88b2-7606d0722e8e
43683,fdae0060-d136-45cc-b2e3-5138999c86f2
22491,dfeeac55-e30c-4150-acfe-e0eb256644b5
33994,7280f4b4-d706-42c4-9a15-8552f7c46071


In [92]:
# Applys match_data_selection over player_id sampled dataframe
match_ids_expanded = player_ids_sample_df["player_ids"].apply(lambda x: match_data_selection(x, auth_key))

# creates dataframe out of match_ids
match_ids_expanded = match_ids_expanded.melt(value_name = "match_id").dropna()[["match_id"]]
match_ids_expanded

Unnamed: 0,match_id
0,1-ececcce7-0859-4b9f-ba5a-f3f5bb343feb
1,1-bd054ae1-6f7a-4584-8499-8b626ef788bd
2,1-d305e1b3-3a46-496d-acf8-3a71a7eb6a71
3,1-b6ce285a-0bf6-4afd-a502-a80eede4e9e2
4,1-9ca3cc35-219d-432b-bcfa-b50a9e9414b9
...,...
2995,1-6e534fa8-4e52-4e48-a544-f117d444f7f2
2996,1-b9753b75-f2a3-4a67-9746-f747171fcd49
2997,1-fa1fccc1-2dfd-4220-87c5-c753983ae8cc
2998,1-aeddca25-b354-492d-8ba9-73540bf6087c


In [94]:
# Creating excel sheet to store data for win prediction
excel_filename_data_win  = "data_win_prediction_6.xlsx"
with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'w') as writer:
    pd.DataFrame().to_excel(writer, index = False)

In [None]:
# Loops over match_id column and returns match data row, then appends to excel file
count = 0
for match_id in match_ids_expanded["match_id"][2832:]:
    count += 1
    data_df = calculate_match_stats(match_id, auth_key, count)

    # Appends to data_win_predction excel sheet
    with pd.ExcelWriter(excel_filename_data_win, engine = 'openpyxl', mode = 'a', if_sheet_exists = 'overlay') as writer:
        data_df.to_excel(writer, index = False, header = False, startrow = writer.sheets['Sheet1'].max_row)

# delays iterations for API limits
time.sleep(3)