In [1]:
import pandas as pd
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup as soup
import datetime
import json
import requests
import sys
import time

In [2]:
leaderboard_df = pd.read_csv('T100Leaderboard.csv')
leaderboard_df = leaderboard_df[["clubId", "platform"]]
leaderboard_df.head(3)
leaderboard_df = leaderboard_df[0:10]
leaderboard_df

Unnamed: 0,clubId,platform
0,3084976,common-gen4
1,13791,common-gen4
2,16046,common-gen4
3,13541,common-gen4
4,2613056,common-gen4
5,3830521,common-gen4
6,3302026,common-gen4
7,2256236,common-gen4
8,17186,common-gen4
9,662486,common-gen4


In [3]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
match_overview_dictionary_list = []
player_build_stat_list = []
player_stat_list = []
club_cosmetic_list = []

In [5]:
vproattr_key_names = [
    "vproattr_1", "vproattr_2", "vproattr_3", "vproattr_4", "vproattr_5",
    "vproattr_6", "vproattr_7", "vproattr_8", "vproattr_9", "vproattr_10",
    "vproattr_11", "vproattr_12", "vproattr_13", "vproattr_14", "vproattr_15",
    "vproattr_16", "vproattr_17", "vproattr_18", "vproattr_19", "vproattr_20",
    "vproattr_21", "vproattr_22", "vproattr_23", "vproattr_24", "vproattr_25",
    "vproattr_26", "vproattr_27", "vproattr_28", "vproattr_29", "vproattr_30",
    "vproattr_31", "vproattr_32", "vproattr_33", "vproattr_34"
]


In [6]:
def match_overview_data_assign(json_dict):
    result = []

    for game in json_dict:
        match_dict = {
            'matchId': game['matchId'],
            'timestamp': datetime.datetime.utcfromtimestamp(game['timestamp']).strftime('%Y-%m-%d %H:%M:%S UTC'),
        }

        both_club_data = game['clubs']

        # Initialize team index
        team_index = 1

        # Iterate through team data directly
        for team_prefix, data in both_club_data.items():
            prefix = f'team_{team_index}'  # Create a dynamic prefix
            team_index += 1  # Increment team index for the next iteration

            for key, value in data.items():
                if key == 'details':
                    match_dict[f'{prefix}_{key}_name'] = value['name']
                    match_dict[f'{prefix}_{key}_clubId'] = str(value['clubId'])
                    match_dict[f'{prefix}_{key}_regionId'] = str(value['regionId'])
                    match_dict[f'{prefix}_{key}_teamId'] = str(value['teamId'])
                    match_dict[f'{prefix}_{key}_stadName'] = value['customKit']['stadName']
#                     match_dict[f'{prefix}_{key}_isCustomTeam'] = str(value['customKit']['isCustomTeam'])
                else:
                    match_dict[f'{prefix}_{key}'] = str(value)

        result.append(match_dict)

    return result

In [7]:
def extract_cosmetic_data(data_list):
    club_data_dict = {}  # Dictionary to store data for each club

    for match in data_list:
        keys = list(match["clubs"].keys())

        for key in keys:
            club_data = {
                'clubId': int(key),
                'timestamp': datetime.datetime.fromtimestamp(int(match['timestamp'])),
                'name': match['clubs'][key]['details']['name'],
                'regionId': int(match['clubs'][key]['details']['regionId']),
                'teamId': int(match['clubs'][key]['details']['teamId']),
                **match['clubs'][key]['details']['customKit'],  # Include all customKit values
            }

            # Convert as many values as possible to integers
            for k, v in club_data.items():
                if k not in ["name", "stadName"]:
                    try:
                        club_data[k] = int(v)
                    except (ValueError, TypeError):
                        pass

            # Check if the clubId already exists in club_data_dict and if the new data has a newer timestamp
            if key in club_data_dict:
                existing_timestamp = club_data_dict[key]['timestamp']
                if club_data['timestamp'] > existing_timestamp:
                    club_data_dict[key] = club_data
            else:
                club_data_dict[key] = club_data

    # Convert the values in club_data_dict to a list
    custom_kit_data_list = list(club_data_dict.values())

    return custom_kit_data_list

In [8]:
def player_data_collect(data_list):
    aggregate_data_list = []

    for match in data_list:
        keys = list(match["clubs"].keys())
        team_1_data = {
            'matchId': int(match["matchId"]),
            'teamId': int(keys[0]),
        }
        team_2_data = {
            'matchId': int(match["matchId"]),
            'teamId': int(keys[1]),
        }

        # Create lists to store player data for each team
        team_1_players_data = []
        team_2_players_data = []

        # Generate list of Team 1 players
        team_1_players = list(match["players"][keys[0]].keys())

        # Generate list of Team 2 players
        team_2_players = list(match["players"][keys[1]].keys())

        # Add key-value pairs from match["players"][keys[0]] to team_1_data
        for player_id, player_data in match["players"][keys[0]].items():
            player_data_dict = {
                'matchId': int(match["matchId"]),
                'teamId': int(keys[0]),
                "playerId": int(player_id),
            }
            for key, value in player_data.items():
                # Convert to integer if value can be converted and key is not "playername"
                if value.isdigit() and key != "playername":
                    player_data_dict[key] = int(value)
                else:
                    player_data_dict[key] = value
            team_1_players_data.append(player_data_dict)

        # Add key-value pairs from match["players"][keys[1]] to team_2_data
        for player_id, player_data in match["players"][keys[1]].items():
            player_data_dict = {
                'matchId': int(match["matchId"]),
                'teamId': int(keys[1]),
                "playerId": int(player_id),
            }
            for key, value in player_data.items():
                # Convert to integer if value can be converted and key is not "playername"
                if value.isdigit() and key != "playername":
                    player_data_dict[key] = int(value)
                else:
                    player_data_dict[key] = value
            team_2_players_data.append(player_data_dict)

        aggregate_data_list.extend(team_1_players_data)
        aggregate_data_list.extend(team_2_players_data)

    return aggregate_data_list

In [9]:
def player_build_attributes(data_list, vproattr_key_names):
    vproattr_data_list = []

    for match in data_list:
        keys = list(match["clubs"].keys())

        # Team IDs
        team1_id = int(keys[0])
        team2_id = int(keys[1])

        # Iterate through players in Team 1
        for player_id, player_data in match["players"][keys[0]].items():
            game_id = int(match["matchId"])
            player_id = int(player_id)
            vproattr_values = player_data.get("vproattr", "").split("|")
            vproattr_values = [int(value) for value in vproattr_values if value.isdigit()]

            # Create a dictionary with keys for each vproattr value
            vproattr_data = {
                "gameId": game_id,
                "playerId": player_id,
                "teamId": team1_id,  # Add the team ID here
            }

            # Add vproattr values with user-defined key names
            for i, key_name in enumerate(vproattr_key_names):
                if i < len(vproattr_values):
                    vproattr_data[key_name] = vproattr_values[i]

            vproattr_data_list.append(vproattr_data)

        # Iterate through players in Team 2
        for player_id, player_data in match["players"][keys[1]].items():
            game_id = int(match["matchId"])
            player_id = int(player_id)
            vproattr_values = player_data.get("vproattr", "").split("|")
            vproattr_values = [int(value) for value in vproattr_values if value.isdigit()]

            # Create a dictionary with keys for each vproattr value
            vproattr_data = {
                "gameId": game_id,
                "playerId": player_id,
                "teamId": team2_id,  # Add the team ID here
            }

            # Add vproattr values with user-defined key names
            for i, key_name in enumerate(vproattr_key_names):
                if i < len(vproattr_values):
                    vproattr_data[key_name] = vproattr_values[i]

            vproattr_data_list.append(vproattr_data)

    return vproattr_data_list

In [10]:
# Initialize variables to track progress
total_rows = len(leaderboard_df)
successful_scrapes = 0
failed_scrapes = 0
failed_rows = []

# Iterate through the rows of leaderboard_df
for index, row in leaderboard_df.iterrows():
    # Construct the URL based on clubId and platform
    club_id = row['clubId']
    platform = row['platform']
    url = f"https://proclubs.ea.com/api/fc/clubs/matches?matchType=leagueMatch&platform={platform}&clubIds={club_id}"

    try:
        browser.visit(url)
        site = soup(browser.html, "html.parser")

        # Find the <pre> tag containing the JSON data
        pre_tag = site.find('pre')

        if pre_tag:
            # Extract the JSON data from the <pre> tag
            json_data = pre_tag.text

            # Convert JSON to a Python dictionary
            json_dict = json.loads(json_data)

            # Extract the broad match data and extend the list
            match_overview_dictionary_list.extend(match_overview_data_assign(json_dict))
            player_build_stat_list.extend(player_build_attributes(json_dict, vproattr_key_names))
            player_stat_list.extend(player_data_collect(json_dict))
            club_cosmetic_list.extend(extract_cosmetic_data(json_dict))
            
            # Increment the successful scrape count
            successful_scrapes += 1

        # Add a one-second pause
        time.sleep(1)

    except Exception as e:
        failed_scrapes += 1
        failed_rows.append(index)
        print(f"\rFailed to scrape row {index}: {str(e)}", end="")
        sys.stdout.flush()

    # Print progress with carriage return to overwrite previous line
    progress_msg = f"Scraping progress: {successful_scrapes} out of {total_rows} complete, {failed_scrapes} failed"
    print(f"\r{progress_msg}", end="")
    sys.stdout.flush()

# Now, match_overview_dictionary contains match data from all the URLs in leaderboard_df
print()  # Print a newline to separate the progress from subsequent output


Scraping progress: 10 out of 10 complete, 0 failed


In [11]:
browser.quit()

In [13]:
club_cosmetic_list

[{'clubId': 26401,
  'timestamp': datetime.datetime(2023, 11, 24, 1, 15, 10),
  'name': 'H O  2',
  'regionId': 5064001,
  'teamId': 1,
  'stadName': 'Mercedes-Benz Stadium',
  'kitId': 8195,
  'seasonalTeamId': 131397,
  'seasonalKitId': 1076404224,
  'selectedKitType': 0,
  'customKitId': 7703,
  'customAwayKitId': 7627,
  'customThirdKitId': 7627,
  'customKeeperKitId': 5029,
  'kitColor1': 592397,
  'kitColor2': 14935011,
  'kitColor3': 592397,
  'kitColor4': 14935011,
  'kitAColor1': 4920087,
  'kitAColor2': 14277081,
  'kitAColor3': 4920087,
  'kitAColor4': 15921906,
  'kitThrdColor1': 1482375,
  'kitThrdColor2': 15921906,
  'kitThrdColor3': 1482375,
  'kitThrdColor4': 15921906,
  'dCustomKit': 0,
  'crestColor': 16777215,
  'crestAssetId': 99060205},
 {'clubId': 3084976,
  'timestamp': datetime.datetime(2023, 11, 24, 1, 15, 10),
  'name': 'Old Champions',
  'regionId': 5064001,
  'teamId': 18,
  'stadName': 'Municipal de Ipurua',
  'kitId': 147457,
  'seasonalTeamId': 131397,
  

In [14]:
json_dict


[{'matchId': '24932216390150',
  'timestamp': 1700737526,
  'timeAgo': {'number': 1, 'unit': 'days'},
  'clubs': {'662486': {'date': '1700737524',
    'gameNumber': '0',
    'goals': '4',
    'goalsAgainst': '2',
    'losses': '0',
    'matchType': '1',
    'result': '1',
    'score': '4',
    'season_id': '0',
    'TEAM': '65',
    'ties': '0',
    'winnerByDnf': '0',
    'wins': '1',
    'details': {'name': 'S  P  U  R  S',
     'clubId': 662486,
     'regionId': 5064001,
     'teamId': 65,
     'customKit': {'stadName': 'Estadio San Mamés',
      'kitId': '532483',
      'seasonalTeamId': '131397',
      'seasonalKitId': '1076404224',
      'selectedKitType': '0',
      'customKitId': '7509',
      'customAwayKitId': '7509',
      'customThirdKitId': '7511',
      'customKeeperKitId': '5005',
      'kitColor1': '15921906',
      'kitColor2': '592397',
      'kitColor3': '592397',
      'kitColor4': '592397',
      'kitAColor1': '592397',
      'kitAColor2': '15921906',
      'kitACo

In [15]:
match_overview_dictionary_list
match_overview_dictionary_df = pd.DataFrame(match_overview_dictionary_list)
match_overview_dictionary_df

Unnamed: 0,matchId,timestamp,team_1_date,team_1_gameNumber,team_1_goals,team_1_goalsAgainst,team_1_losses,team_1_matchType,team_1_result,team_1_score,...,team_2_season_id,team_2_TEAM,team_2_ties,team_2_winnerByDnf,team_2_wins,team_2_details_name,team_2_details_clubId,team_2_details_regionId,team_2_details_teamId,team_2_details_stadName
0,25182250910116,2023-11-24 06:15:10 UTC,1700806508,0,0,2,1,1,2,0,...,0,18,0,0,1,Old Champions,3084976,5064001,18,Municipal de Ipurua
1,25156461070308,2023-11-24 05:42:39 UTC,1700804558,0,2,5,1,1,2,2,...,0,18,0,0,1,Old Champions,3084976,5064001,18,Municipal de Ipurua
2,24860304270416,2023-11-23 02:51:42 UTC,1700707900,0,2,6,1,1,2,2,...,0,1335,0,0,1,Old Champions,3084976,5064001,18,Municipal de Ipurua
3,24812847840410,2023-11-23 02:31:48 UTC,1700706706,0,3,1,0,1,1,3,...,0,112139,0,0,0,Coldplay,4071716,5064001,112139,Tier 1 Stadium
4,24824602830453,2023-11-23 02:13:00 UTC,1700705578,0,3,3,0,1,4,3,...,0,1335,1,0,0,Old Champions,3084976,5064001,18,Municipal de Ipurua
5,25200092700465,2023-11-24 08:50:23 UTC,1700815822,0,3,0,0,1,16385,3,...,0,11,0,0,0,T h u n d e r,2018971,4281683,11,Heinz von Heiden-Arena
6,25196195260448,2023-11-24 08:47:13 UTC,1700815632,0,3,0,0,1,16385,3,...,0,11,0,0,0,T h u n d e r,2018971,4281683,11,Heinz von Heiden-Arena
7,25160614240017,2023-11-24 08:44:08 UTC,1700815447,0,3,0,0,1,16385,3,...,0,11,0,0,0,T h u n d e r,2018971,4281683,11,Heinz von Heiden-Arena
8,25162567920196,2023-11-24 08:40:58 UTC,1700815257,0,3,0,0,1,16385,3,...,0,11,0,0,0,T h u n d e r,2018971,4281683,11,Heinz von Heiden-Arena
9,25198115370336,2023-11-24 08:37:32 UTC,1700815052,0,3,0,0,1,16385,3,...,0,11,0,0,0,T h u n d e r,2018971,4281683,11,Heinz von Heiden-Arena


In [16]:
json_dict

[{'matchId': '24932216390150',
  'timestamp': 1700737526,
  'timeAgo': {'number': 1, 'unit': 'days'},
  'clubs': {'662486': {'date': '1700737524',
    'gameNumber': '0',
    'goals': '4',
    'goalsAgainst': '2',
    'losses': '0',
    'matchType': '1',
    'result': '1',
    'score': '4',
    'season_id': '0',
    'TEAM': '65',
    'ties': '0',
    'winnerByDnf': '0',
    'wins': '1',
    'details': {'name': 'S  P  U  R  S',
     'clubId': 662486,
     'regionId': 5064001,
     'teamId': 65,
     'customKit': {'stadName': 'Estadio San Mamés',
      'kitId': '532483',
      'seasonalTeamId': '131397',
      'seasonalKitId': '1076404224',
      'selectedKitType': '0',
      'customKitId': '7509',
      'customAwayKitId': '7509',
      'customThirdKitId': '7511',
      'customKeeperKitId': '5005',
      'kitColor1': '15921906',
      'kitColor2': '592397',
      'kitColor3': '592397',
      'kitColor4': '592397',
      'kitAColor1': '592397',
      'kitAColor2': '15921906',
      'kitACo

In [17]:
duck = player_build_attributes(json_dict, vproattr_key_names)

In [18]:
player_stat_list

[{'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 897712440,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 21,
  'passesmade': 17,
  'pos': 'forward',
  'rating': '6.30',
  'realtimegame': 957,
  'realtimeidle': 14,
  'redcards': 0,
  'saves': 0,
  'SCORE': 0,
  'shots': 7,
  'tackleattempts': 10,
  'tacklesmade': 0,
  'vproattr': '078|092|080|091|074|087|088|091|072|089|060|098|075|093|072|089|098|084|084|073|075|055|086|089|054|047|091|076|084|010|010|010|010|010|',
  'vprohackreason': 8,
  'wins': 0,
  'playername': 'F9lrc'},
 {'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 1816663702,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 7,
  'passesmade': 5,
  'pos': 'forward',
  'rating': '6.70

In [19]:
player_stat_list_df = pd.DataFrame(player_stat_list)

In [20]:
player_stat_list_df

Unnamed: 0,matchId,teamId,playerId,assists,cleansheetsany,cleansheetsdef,cleansheetsgk,goals,goalsconceded,losses,...,redcards,saves,SCORE,shots,tackleattempts,tacklesmade,vproattr,vprohackreason,wins,playername
0,25182250910116,26401,897712440,0,0,0,0,0,2,1,...,0,0,0,7,10,0,078|092|080|091|074|087|088|091|072|089|060|09...,8,0,F9lrc
1,25182250910116,26401,1816663702,0,0,0,0,0,2,1,...,0,0,0,3,8,3,098|095|099|095|070|085|060|093|062|085|064|09...,8,0,R-7hp
2,25182250910116,26401,1916753895,0,0,0,0,0,2,1,...,0,0,0,0,15,2,094|091|090|089|070|083|065|088|071|081|095|08...,8,0,lCqk
3,25182250910116,26401,1969806916,0,0,0,0,0,2,1,...,0,0,0,0,11,3,094|094|095|095|071|081|073|085|071|083|079|09...,8,0,DKToR_i
4,25182250910116,26401,1004757345736,0,0,0,0,0,2,1,...,0,0,0,0,3,0,094|094|075|078|077|087|077|092|086|081|095|07...,8,0,SZXO_7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,24932154470015,4152706,193083681,0,0,0,0,1,4,1,...,0,0,1,3,40,1,094|091|082|094|071|076|074|085|066|088|060|09...,8,0,ronaldo990
535,24932154470015,4152706,1873723519,1,0,0,0,0,4,1,...,0,0,1,0,7,4,098|095|099|087|067|077|059|085|062|085|064|09...,8,0,co_1j_2
536,24932154470015,4152706,1003778529393,0,0,0,0,0,0,1,...,0,0,1,0,0,0,,0,0,OhhRuthless--
537,24932154470015,4152706,1005482916657,0,0,0,0,0,0,1,...,0,0,1,0,0,0,,0,0,mujahid_m99


In [21]:
player_stat_list

[{'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 897712440,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 21,
  'passesmade': 17,
  'pos': 'forward',
  'rating': '6.30',
  'realtimegame': 957,
  'realtimeidle': 14,
  'redcards': 0,
  'saves': 0,
  'SCORE': 0,
  'shots': 7,
  'tackleattempts': 10,
  'tacklesmade': 0,
  'vproattr': '078|092|080|091|074|087|088|091|072|089|060|098|075|093|072|089|098|084|084|073|075|055|086|089|054|047|091|076|084|010|010|010|010|010|',
  'vprohackreason': 8,
  'wins': 0,
  'playername': 'F9lrc'},
 {'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 1816663702,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 7,
  'passesmade': 5,
  'pos': 'forward',
  'rating': '6.70

In [22]:
DUCKS = player_data_collect(json_dict)

In [23]:
player_stat_list

[{'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 897712440,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 21,
  'passesmade': 17,
  'pos': 'forward',
  'rating': '6.30',
  'realtimegame': 957,
  'realtimeidle': 14,
  'redcards': 0,
  'saves': 0,
  'SCORE': 0,
  'shots': 7,
  'tackleattempts': 10,
  'tacklesmade': 0,
  'vproattr': '078|092|080|091|074|087|088|091|072|089|060|098|075|093|072|089|098|084|084|073|075|055|086|089|054|047|091|076|084|010|010|010|010|010|',
  'vprohackreason': 8,
  'wins': 0,
  'playername': 'F9lrc'},
 {'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 1816663702,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 7,
  'passesmade': 5,
  'pos': 'forward',
  'rating': '6.70

In [24]:
player_stat_list

[{'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 897712440,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 21,
  'passesmade': 17,
  'pos': 'forward',
  'rating': '6.30',
  'realtimegame': 957,
  'realtimeidle': 14,
  'redcards': 0,
  'saves': 0,
  'SCORE': 0,
  'shots': 7,
  'tackleattempts': 10,
  'tacklesmade': 0,
  'vproattr': '078|092|080|091|074|087|088|091|072|089|060|098|075|093|072|089|098|084|084|073|075|055|086|089|054|047|091|076|084|010|010|010|010|010|',
  'vprohackreason': 8,
  'wins': 0,
  'playername': 'F9lrc'},
 {'matchId': 25182250910116,
  'teamId': 26401,
  'playerId': 1816663702,
  'assists': 0,
  'cleansheetsany': 0,
  'cleansheetsdef': 0,
  'cleansheetsgk': 0,
  'goals': 0,
  'goalsconceded': 2,
  'losses': 1,
  'mom': 0,
  'namespace': 1,
  'passattempts': 7,
  'passesmade': 5,
  'pos': 'forward',
  'rating': '6.70

In [25]:
duckz = extract_cosmetic_data(json_dict)

In [26]:
duckz 

[{'clubId': 662486,
  'timestamp': datetime.datetime(2023, 11, 23, 6, 5, 26),
  'name': 'S  P  U  R  S',
  'regionId': 5064001,
  'teamId': 65,
  'stadName': 'Estadio San Mamés',
  'kitId': 532483,
  'seasonalTeamId': 131397,
  'seasonalKitId': 1076404224,
  'selectedKitType': 0,
  'customKitId': 7509,
  'customAwayKitId': 7509,
  'customThirdKitId': 7511,
  'customKeeperKitId': 5005,
  'kitColor1': 15921906,
  'kitColor2': 592397,
  'kitColor3': 592397,
  'kitColor4': 592397,
  'kitAColor1': 592397,
  'kitAColor2': 15921906,
  'kitAColor3': 15921906,
  'kitAColor4': 15921906,
  'kitThrdColor1': 12706617,
  'kitThrdColor2': 33627,
  'kitThrdColor3': 396864,
  'kitThrdColor4': 2164288,
  'dCustomKit': 0,
  'crestColor': -1,
  'crestAssetId': 99160625},
 {'clubId': 2900086,
  'timestamp': datetime.datetime(2023, 11, 23, 6, 5, 26),
  'name': 'Tyaar',
  'regionId': 5064001,
  'teamId': 241,
  'stadName': 'Estadio José Zorrilla',
  'kitId': 1974272,
  'seasonalTeamId': 0,
  'seasonalKitId':

In [52]:
player_data_collect_df = pd.DataFrame(player_stat_list)

In [53]:
player_data_collect_df

Unnamed: 0,matchId,teamId,playerId,assists,cleansheetsany,cleansheetsdef,cleansheetsgk,goals,goalsconceded,losses,...,redcards,saves,SCORE,shots,tackleattempts,tacklesmade,vproattr,vprohackreason,wins,playername
0,25182250910116,26401,897712440,0,0,0,0,0,2,1,...,0,0,0,7,10,0,078|092|080|091|074|087|088|091|072|089|060|09...,8,0,F9lrc
1,25182250910116,26401,1816663702,0,0,0,0,0,2,1,...,0,0,0,3,8,3,098|095|099|095|070|085|060|093|062|085|064|09...,8,0,R-7hp
2,25182250910116,26401,1916753895,0,0,0,0,0,2,1,...,0,0,0,0,15,2,094|091|090|089|070|083|065|088|071|081|095|08...,8,0,lCqk
3,25182250910116,26401,1969806916,0,0,0,0,0,2,1,...,0,0,0,0,11,3,094|094|095|095|071|081|073|085|071|083|079|09...,8,0,DKToR_i
4,25182250910116,26401,1004757345736,0,0,0,0,0,2,1,...,0,0,0,0,3,0,094|094|075|078|077|087|077|092|086|081|095|07...,8,0,SZXO_7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,24932154470015,4152706,193083681,0,0,0,0,1,4,1,...,0,0,1,3,40,1,094|091|082|094|071|076|074|085|066|088|060|09...,8,0,ronaldo990
535,24932154470015,4152706,1873723519,1,0,0,0,0,4,1,...,0,0,1,0,7,4,098|095|099|087|067|077|059|085|062|085|064|09...,8,0,co_1j_2
536,24932154470015,4152706,1003778529393,0,0,0,0,0,0,1,...,0,0,1,0,0,0,,0,0,OhhRuthless--
537,24932154470015,4152706,1005482916657,0,0,0,0,0,0,1,...,0,0,1,0,0,0,,0,0,mujahid_m99


In [54]:
player_data_collect_df = player_data_collect_df[['matchId', 'teamId','playerId','vproattr']]

In [55]:
player_data_collect_df

Unnamed: 0,matchId,teamId,playerId,vproattr
0,25182250910116,26401,897712440,078|092|080|091|074|087|088|091|072|089|060|09...
1,25182250910116,26401,1816663702,098|095|099|095|070|085|060|093|062|085|064|09...
2,25182250910116,26401,1916753895,094|091|090|089|070|083|065|088|071|081|095|08...
3,25182250910116,26401,1969806916,094|094|095|095|071|081|073|085|071|083|079|09...
4,25182250910116,26401,1004757345736,094|094|075|078|077|087|077|092|086|081|095|07...
...,...,...,...,...
534,24932154470015,4152706,193083681,094|091|082|094|071|076|074|085|066|088|060|09...
535,24932154470015,4152706,1873723519,098|095|099|087|067|077|059|085|062|085|064|09...
536,24932154470015,4152706,1003778529393,
537,24932154470015,4152706,1005482916657,


In [56]:
vproattr_key_names = [
    "vproattr_1", "vproattr_2", "vproattr_3", "vproattr_4", "vproattr_5",
    "vproattr_6", "vproattr_7", "vproattr_8", "vproattr_9", "vproattr_10",
    "vproattr_11", "vproattr_12", "vproattr_13", "vproattr_14", "vproattr_15",
    "vproattr_16", "vproattr_17", "vproattr_18", "vproattr_19", "vproattr_20",
    "vproattr_21", "vproattr_22", "vproattr_23", "vproattr_24", "vproattr_25",
    "vproattr_26", "vproattr_27", "vproattr_28", "vproattr_29", "vproattr_30",
    "vproattr_31", "vproattr_32", "vproattr_33", "vproattr_34"
]

In [57]:
vproattr_key_names = [
    "vproattr_1", "vproattr_2", "vproattr_3", "vproattr_4", "vproattr_5",
    "vproattr_6", "vproattr_7", "vproattr_8", "vproattr_9", "vproattr_10",
    "vproattr_11", "vproattr_12", "vproattr_13", "vproattr_14", "vproattr_15",
    "vproattr_16", "vproattr_17", "vproattr_18", "vproattr_19", "vproattr_20",
    "vproattr_21", "vproattr_22", "vproattr_23", "vproattr_24", "vproattr_25",
    "vproattr_26", "vproattr_27", "vproattr_28", "vproattr_29", "vproattr_30",
    "vproattr_31", "vproattr_32", "vproattr_33", "vproattr_34", "vproattr_35"
]

# Split the values in the 'vproattr' column and create new columns
player_data_collect_df[vproattr_key_names] = player_data_collect_df['vproattr'].str.split('|', expand=True)

# Drop the original 'vproattr' column if needed
player_data_collect_df.drop('vproattr', axis=1, inplace=True)

# Display the resulting DataFrame

player_data_collect_df = pd.DataFrame(player_data_collect_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [58]:
player_data_collect_df

Unnamed: 0,matchId,teamId,playerId,vproattr_1,vproattr_2,vproattr_3,vproattr_4,vproattr_5,vproattr_6,vproattr_7,...,vproattr_26,vproattr_27,vproattr_28,vproattr_29,vproattr_30,vproattr_31,vproattr_32,vproattr_33,vproattr_34,vproattr_35
0,25182250910116,26401,897712440,078,092,080,091,074,087,088,...,047,091,076,084,010,010,010,010,010,
1,25182250910116,26401,1816663702,098,095,099,095,070,085,060,...,056,080,083,075,010,010,010,010,010,
2,25182250910116,26401,1916753895,094,091,090,089,070,083,065,...,091,064,078,070,010,010,010,010,010,
3,25182250910116,26401,1969806916,094,094,095,095,071,081,073,...,075,073,079,075,010,010,010,010,010,
4,25182250910116,26401,1004757345736,094,094,075,078,077,087,077,...,093,065,077,072,010,010,010,010,010,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,24932154470015,4152706,193083681,094,091,082,094,071,076,074,...,052,088,078,083,010,010,010,010,010,
535,24932154470015,4152706,1873723519,098,095,099,087,067,077,059,...,056,074,090,075,010,010,010,010,010,
536,24932154470015,4152706,1003778529393,,,,,,,,...,,,,,,,,,,
537,24932154470015,4152706,1005482916657,,,,,,,,...,,,,,,,,,,
