In [None]:
import cloudscraper
from bs4 import BeautifulSoup
import re
import os
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import logging
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import SoftwareName, OperatingSystem
import random
from aiohttp import ClientSession
import asyncio

In [None]:

async def handle_response_error(response):
    if response.status == 429:
        logging.error("Yêu cầu thất bại, mã lỗi 429 (Quá nhiều yêu cầu), đang thử lại sau 10 giây...")
        await asyncio.sleep(random.uniform(5, 7))
    elif response.status == 403:
        logging.error("Yêu cầu thất bại, mã lỗi 403 (Quá nhiều yêu cầu), đang thử lại sau 10 giây...")
        await asyncio.sleep(random.uniform(5, 7))
    else:
        print(f"Yêu cầu thất bại, mã lỗi: {response.status}")

async def fetch_all_players_data(session, player_links):
    semaphore = asyncio.Semaphore(6)  # Số lượng yêu cầu đồng thời
    tasks = []
    async def fetch_with_semaphore(url):
        async with semaphore:
            try:
                return await crawl_player_details(session, url)
            except ClientError as e:
                print(f"Error for URL {url}: {e}")
                return None
    
    for player_link in player_links:
        url = f"https://sofifa.com{player_link['href']}"
        if "/player/" in url and "/random" not in url:
            task = fetch_with_semaphore(url)
            tasks.append(task)
            
    return await asyncio.gather(*tasks)

async def crawl_all_players_data(year, file_path='D:/Jupyter/DATA/test4.csv'):
    base_url = f"https://sofifa.com/players?r={year}&set=true"
    #scraper = cloudscraper.create_scraper()
    data_frame = pd.DataFrame()
    page = 1
    has_players = True
    software_names = [SoftwareName.CHROME.value]
    operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]   
    user_agent_rotator = UserAgent(software_names=software_names, operating_systems=operating_systems, limit=1000)
    async with ClientSession() as session:
        #player_data_list = []
        while has_players:
            page_url = f"{base_url}&offset={((page - 1) * 61)}"
            headers = {'User-Agent': user_agent_rotator.get_random_user_agent()}
            response = await session.get(page_url, headers=headers)
            #time.sleep(3)
            if response.status == 200:
                soup = BeautifulSoup(await response.text(), 'html.parser')
                player_links = soup.find_all('a', href=lambda href: href and "/player/" in href)

                if not player_links:
                    has_players = False
                    break
                else:
                    data_frames = await fetch_all_players_data(session, player_links)
                    
                    for player_data in data_frames:
                        if player_data is not None:
                            df = pd.DataFrame([player_data])
                            file_exists = os.path.isfile(file_path)
                            df.to_csv(file_path, mode='a', header=not file_exists, index=False)
                    page += 1
                    #await asyncio.sleep(random.uniform(1, 3))  # Thêm thời gian chờ giữa các trang
            else:
                await handle_response_error(response)
    
    return data_frame

In [None]:
async def crawl_player_details(session, player_url):
    scraper = cloudscraper.create_scraper(
        browser={
        'browser': 'chrome',
        'platform': 'windows',
        'desktop': True,
        'mobile': False,
        'custom':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46"
    }, delay = 5)
    response = await session.get(player_url)
    await asyncio.sleep(random.uniform(1, 3))
    if response.status == 200:
        soup = BeautifulSoup(await response.text(), 'html.parser')
        short_name = soup.find('h1', class_='ellipsis').text.strip()
        player_info = soup.find('div', class_='info')
        player_name = player_info.find('h1').text.strip()
        nationality = player_info.find('a', title=True)['title'] or "Không xác định"
        #Lấy thông tin chuỗi thông tin cơ bản
        meta_info = player_info.find('div', class_='meta ellipsis').text.strip()
        # Sử dụng biểu thức chính quy để trích xuất thông tin
        match = re.match(r'(.+?) (\d+)y\.o\. \((\w+ \d+, \d+)\) (\d+)cm / (\S+) (\d+)kg', meta_info)
        position = match.group(1)
        age = match.group(2)
        birthday = match.group(3)
        height = match.group(4)
        weight_unit = match.group(5)
        weight = match.group(6)
        player_info2 = soup.find('section', class_='card spacing')
        overall_rating = player_info2.find('div', string='Overall rating').find_previous('span').text.strip()
        potential = player_info2.find('div', string='Potential').find_previous('span').text.strip()
        value = player_info2.find('div', string='Value').previous_sibling.strip()
        wage = player_info2.find('div', string='Wage').previous_sibling.strip()
        #list chứa thông tin cầu thủ
        player_data = [
            ('Player URL', player_url),
            ('Short Name', short_name),
            ('Player Name', player_name),
            ('Nationality', nationality),
            ('Position', position),
            ('Age', age),
            ('Birthday', birthday),
            ('Height', height),
            ('Weight', weight),
            ('Overall Rating', overall_rating),
            ('Potential', potential),
            ('Value', value),
            ('Wage', wage),
            
        ]
        profile_divs = soup.find_all('div', class_='card')
        # Duyệt qua mỗi phần tử div 'card' và lấy dữ liệu
        for profile_div in profile_divs:
            
            ul_element_profile = profile_div.find('ul', class_='pl')
            
            # Kiểm tra xem thẻ <ul class="pl"> có tồn tại không
            if ul_element_profile:
                # Tìm tất cả các phần tử <li> trong thẻ <ul class="pl">
                li_elements_profile = ul_element_profile.find_all('li')
                
                for li in li_elements_profile:
                    # Lấy các thẻ con bên trong <li>
                    label_tag = li.find('label')
                    span_tag_profile = li.find('span')
                    has_svg = bool(li.find('svg'))
                    if label_tag:
                        attribute_name_profile = label_tag.text.strip()
                        
                        if span_tag_profile:
                            # Trường hợp có <span>, lấy giá trị từ <span>
                            attribute_value_profile = span_tag_profile.text.strip()
                        elif not has_svg:
                            # Trường hợp không có <span> và không có <svg>, lấy giá trị từ toàn bộ <li>
                            attribute_value_profile = li.text.strip().replace(attribute_name_profile, '')
                        else:
                            # Trường hợp có <svg>
                            attribute_value_profile = li.text.strip().replace(attribute_name_profile, '')
                            # Sử dụng biểu thức chính quy để lấy con số trước <svg>
                            match1 = re.search(r'\d+', attribute_value_profile)
                            attribute_value_profile = match1.group() if match1 else ""
                        
                        #print(f"{attribute_name_profile} {attribute_value_profile}")
                        player_data.append((attribute_name_profile, attribute_value_profile))
            h5_element = profile_div.find('h5')
            if h5_element:
                a_element = h5_element.find('a')
                if a_element:
                    club_link = a_element['href']
                    match = re.search(r'/team/\d+/(.*)/', club_link)
                    if match:
                        club_name = match.group(1)
                        #print(f"Club Name: {club_name}")
                        player_data.append(club_name)
            else:
                # Nếu không tìm thấy thẻ <ul class='pl'> trong thẻ div với class='card', bỏ qua và không in gì cả
                pass   
        
        card_divs = soup.find_all('div', class_='card')
        for card_div in card_divs:
            # Tìm thẻ <ul class="pl">
            ul_element = card_div.find('ul', class_='pl')
            # Kiểm tra xem thẻ <ul class="pl"> có tồn tại không
            if ul_element:
                # Tìm tất cả các phần tử <li> trong thẻ <ul class="pl">
                li_elements = ul_element.find_all('li')
                
                # Trích xuất dữ liệu và in ra màn hình
                for li in li_elements:
                    span_tag = li.find('span', class_='bp3-tag')
                    attribute_name_span = li.find('span', role='tooltip')

                    # Kiểm tra xem cả hai thẻ có tồn tại không trước khi truy xuất text
                    if span_tag and attribute_name_span:
                        attribute_value = span_tag.text.strip()
                        attribute_name = attribute_name_span.text.strip() 
                        #print(f"{attribute_name} {attribute_value}")
                        player_data.append((attribute_name, attribute_value))
                    else:
                        # Nếu một số phần tử bị thiếu, bỏ qua và không in gì cả
                        pass      
            else:
                # Nếu không tìm thấy thẻ <ul class='pl'> trong thẻ div với class='card', bỏ qua và không in gì cả
                pass
        
        print(player_data)
        return player_data
    else:
        await handle_response_error(response)
        return None  # Trả về None nếu có lỗi

In [None]:
# final_df = pd.DataFrame()
# final_df = pd.concat([final_df, crawl_all_players_data(220069, num_threads=8)], ignore_index=True)
#print(data_frame)
await crawl_all_players_data(220069)

In [2]:
import pandas as pd
data_frame = pd.read_csv('D:/Jupyter/DATA/Final_Crawl.csv',header=None)

  data_frame = pd.read_csv('D:/Jupyter/DATA/Final_Crawl.csv',header=None)


In [3]:
pd.set_option('display.max_columns', 100)
#pd.set_option('display.max_rows', 100)
data_frame.head(10)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
0,PLAYER_URL,Short Name,Player Name,Nationality,Player_Position,Age,Birthday,Height(cm),Weight(kg),Overall Rating,Potential,Value,Wage,Preferred foot,Skill moves,Weak foot,International reputation,Work rate,Body type,Real face,Release clause,ID,Club_Position,Club_Kit number,Joined,Loaned,Contract valid until,Club_name,National_Position,National_Kit number,National_name,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes
1,"('Player URL', 'https://sofifa.com/player/2326...","('Short Name', 'S. Singh')","('Player Name', 'Sarpreet Singh')","('Nationality', 'New Zealand')","('Position', 'CAM RM')","('Age', '22')","('Birthday', 'Feb 20, 1999')","('Height', '177')","('Weight', '70')","('Overall Rating', '69')","('Potential', '78')","('Value', '€3.1M')","('Wage', '€5K')","('Preferred foot', 'Left')","('Skill moves', '3')","('Weak foot', '3')","('International reputation', '1')","('Work rate', 'High/ Medium')","('Body type', 'Lean (170-185)')","('Real face', 'No')","('Release clause', '€5.4M')","('ID', '232669')","('Position', 'LAM')","('Kit number', '15')","('Joined', 'Jul 1, 2019')",,"('Contract valid until', '2024')",jahn-regensburg,"('Position', 'CAM')","('Kit number', '10')",new-zealand,"('Crossing', '67')","('Finishing', '64')","('Heading accuracy', '44')","('Short passing', '70')","('Volleys', '60')","('Dribbling', '71')","('Curve', '73')","('FK Accuracy', '64')","('Long passing', '69')","('Ball control', '70')","('Acceleration', '80')","('Sprint speed', '70')","('Agility', '78')","('Reactions', '66')","('Balance', '81')","('Shot power', '68')","('Jumping', '53')","('Stamina', '66')","('Strength', '51')","('Long shots', '67')","('Aggression', '63')","('Interceptions', '51')","('Positioning', '60')","('Vision', '72')","('Penalties', '64')","('Composure', '64')","('Defensive awareness', '51')","('Standing tackle', '47')","('Sliding tackle', '41')","('GK Diving', '14')","('GK Handling', '6')","('GK Kicking', '12')","('GK Positioning', '8')","('GK Reflexes', '8')"
2,"('Player URL', 'https://sofifa.com/player/2382...","('Short Name', 'D. Gazdag')","('Player Name', 'Dániel Gazdag')","('Nationality', 'Hungary')","('Position', 'CAM CM')","('Age', '25')","('Birthday', 'Mar 2, 1996')","('Height', '178')","('Weight', '76')","('Overall Rating', '69')","('Potential', '72')","('Value', '€1.8M')","('Wage', '€4K')","('Preferred foot', 'Right')","('Skill moves', '3')","('Weak foot', '3')","('International reputation', '1')","('Work rate', 'High/ Medium')","('Body type', 'Normal (170-185)')","('Real face', 'No')","('Release clause', '€2.8M')","('ID', '238277')","('Position', 'SUB')","('Kit number', '15')","('Joined', 'May 11, 2021')",,"('Contract valid until', '2022')",philadelphia-union,"('Position', 'CAM')","('Kit number', '6')",hungary,"('Crossing', '51')","('Finishing', '66')","('Heading accuracy', '47')","('Short passing', '70')","('Volleys', '59')","('Dribbling', '72')","('Curve', '61')","('FK Accuracy', '60')","('Long passing', '68')","('Ball control', '71')","('Acceleration', '73')","('Sprint speed', '78')","('Agility', '71')","('Reactions', '68')","('Balance', '67')","('Shot power', '64')","('Jumping', '59')","('Stamina', '65')","('Strength', '59')","('Long shots', '63')","('Aggression', '65')","('Interceptions', '60')","('Positioning', '64')","('Vision', '68')","('Penalties', '48')","('Composure', '60')","('Defensive awareness', '44')","('Standing tackle', '53')","('Sliding tackle', '49')","('GK Diving', '11')","('GK Handling', '7')","('GK Kicking', '5')","('GK Positioning', '11')","('GK Reflexes', '11')"
3,"('Player URL', 'https://sofifa.com/player/1865...","('Short Name', 'A. Ramsey')","('Player Name', 'Aaron James Ramsey')","('Nationality', 'Wales')","('Position', 'CM CAM LM')","('Age', '30')","('Birthday', 'Dec 26, 1990')","('Height', '183')","('Weight', '76')","('Overall Rating', '77')","('Potential', '77')","('Value', '€10M')","('Wage', '€65K')","('Preferred foot', 'Right')","('Skill moves', '3')","('Weak foot', '3')","('International reputation', '3')","('Work rate', 'Medium/ Medium')","('Body type', 'Lean (170-185)')","('Real face', 'Yes')",,"('ID', '186561')","('Position', 'SUB')","('Kit number', '16')",,"('Loaned from', 'Juventus')","('Contract valid until', 'May 31, 2022')",rangers,"('Position', 'CAM')","('Kit number', '20')",wales,"('Crossing', '70')","('Finishing', '65')","('Heading accuracy', '55')","('Short passing', '80')","('Volleys', '75')","('Dribbling', '79')","('Curve', '79')","('FK Accuracy', '70')","('Long passing', '78')","('Ball control', '82')","('Acceleration', '55')","('Sprint speed', '63')","('Agility', '68')","('Reactions', '73')","('Balance', '78')","('Shot power', '76')","('Jumping', '63')","('Stamina', '54')","('Strength', '65')","('Long shots', '75')","('Aggression', '65')","('Interceptions', '65')","('Positioning', '75')","('Vision', '79')","('Penalties', '75')","('Composure', '77')","('Defensive awareness', '66')","('Standing tackle', '67')","('Sliding tackle', '67')","('GK Diving', '6')","('GK Handling', '11')","('GK Kicking', '5')","('GK Reflexes', '8')",
4,"('Player URL', 'https://sofifa.com/player/2365...","('Short Name', 'J. Sýkora')","('Player Name', 'Jan Sýkora')","('Nationality', 'Czech Republic')","('Position', 'LM LB CAM')","('Age', '27')","('Birthday', 'Dec 29, 1993')","('Height', '171')","('Weight', '64')","('Overall Rating', '73')","('Potential', '73')","('Value', '€3.2M')","('Wage', '€950')","('Preferred foot', 'Left')","('Skill moves', '3')","('Weak foot', '3')","('International reputation', '1')","('Work rate', 'High/ High')","('Body type', 'Lean (170-185)')","('Real face', 'No')","('Release clause', '€7M')","('ID', '236547')","('Position', 'SUB')","('Kit number', '18')","('Joined', 'Aug 24, 2020')",,"('Contract valid until', '2024')",viktoria-plzen,"('Position', 'CAM')","('Kit number', '7')",czech-republic,"('Crossing', '74')","('Finishing', '63')","('Heading accuracy', '52')","('Short passing', '70')","('Volleys', '64')","('Dribbling', '75')","('Curve', '69')","('FK Accuracy', '70')","('Long passing', '64')","('Ball control', '72')","('Acceleration', '81')","('Sprint speed', '81')","('Agility', '84')","('Reactions', '67')","('Balance', '83')","('Shot power', '74')","('Jumping', '73')","('Stamina', '76')","('Strength', '57')","('Long shots', '72')","('Aggression', '84')","('Interceptions', '63')","('Positioning', '69')","('Vision', '68')","('Penalties', '65')","('Composure', '70')","('Defensive awareness', '59')","('Standing tackle', '67')","('Sliding tackle', '66')","('GK Diving', '6')","('GK Handling', '7')","('GK Kicking', '10')","('GK Positioning', '9')","('GK Reflexes', '13')"
5,"('Player URL', 'https://sofifa.com/player/2330...","('Short Name', 'M. Mount')","('Player Name', 'Mason Mount')","('Nationality', 'England')","('Position', 'CAM RW')","('Age', '22')","('Birthday', 'Jan 10, 1999')","('Height', '180')","('Weight', '74')","('Overall Rating', '83')","('Potential', '89')","('Value', '€58.5M')","('Wage', '€120K')","('Preferred foot', 'Right')","('Skill moves', '3')","('Weak foot', '4')","('International reputation', '3')","('Work rate', 'High/ High')","('Body type', 'Lean (170-185)')","('Real face', 'Yes')","('Release clause', '€119.9M')","('ID', '233064')","('Position', 'CAM')","('Kit number', '19')","('Joined', 'Jan 10, 2016')",,"('Contract valid until', '2024')",chelsea,"('Position', 'CAM')","('Kit number', '19')",england,"('Crossing', '83')","('Finishing', '80')","('Heading accuracy', '59')","('Short passing', '86')","('Volleys', '75')","('Dribbling', '82')","('Curve', '82')","('FK Accuracy', '83')","('Long passing', '83')","('Ball control', '85')","('Acceleration', '77')","('Sprint speed', '73')","('Agility', '80')","('Reactions', '84')","('Balance', '76')","('Shot power', '81')","('Jumping', '54')","('Stamina', '86')","('Strength', '58')","('Long shots', '82')","('Aggression', '68')","('Interceptions', '59')","('Positioning', '81')","('Vision', '83')","('Penalties', '69')","('Composure', '84')","('Defensive awareness', '55')","('Standing tackle', '57')","('Sliding tackle', '41')","('GK Diving', '11')","('GK Handling', '12')","('GK Kicking', '13')","('GK Positioning', '8')","('GK Reflexes', '12')"
6,"('Player URL', 'https://sofifa.com/player/2026...","('Short Name', 'D. Horgan')","('Player Name', 'Daryl Jeremiah Horgan')","('Nationality', 'Republic of Ireland')","('Position', 'CAM')","('Age', '28')","('Birthday', 'Aug 10, 1992')","('Height', '170')","('Weight', '70')","('Overall Rating', '66')","('Potential', '66')","('Value', '€850K')","('Wage', '€3K')","('Preferred foot', 'Right')","('Skill moves', '4')","('Weak foot', '4')","('International reputation', '1')","('Work rate', 'Medium/ Medium')","('Body type', 'Normal (170-)')","('Real face', 'No')","('Release clause', '€1.5M')","('ID', '202663')","('Position', 'SUB')","('Kit number', '19')","('Joined', 'Sep 2, 2020')",,"('Contract valid until', '2023')",wycombe-wanderers,"('Position', 'CAM')","('Kit number', '17')",republic-of-ireland,"('Crossing', '65')","('Finishing', '60')","('Heading accuracy', '51')","('Short passing', '64')","('Volleys', '54')","('Dribbling', '66')","('Curve', '69')","('FK Accuracy', '67')","('Long passing', '59')","('Ball control', '66')","('Acceleration', '83')","('Sprint speed', '82')","('Agility', '90')","('Reactions', '59')","('Balance', '90')","('Shot power', '64')","('Jumping', '81')","('Stamina', '75')","('Strength', '55')","('Long shots', '60')","('Aggression', '40')","('Interceptions', '16')","('Positioning', '63')","('Vision', '65')","('Penalties', '59')","('Composure', '62')","('Defensive awareness', '48')","('Standing tackle', '26')","('Sliding tackle', '20')","('GK Diving', '7')","('GK Handling', '13')","('GK Kicking', '15')","('GK Positioning', '12')","('GK Reflexes', '7')"
7,"('Player URL', 'https://sofifa.com/player/2530...","('Short Name', 'G. Raspadori')","('Player Name', 'Giacomo Raspadori')","('Nationality', 'Italy')","('Position', 'ST LW')","('Age', '21')","('Birthday', 'Feb 18, 2000')","('Height', '172')","('Weight', '69')","('Overall Rating', '76')","('Potential', '85')","('Value', '€17M')","('Wage', '€25K')","('Preferred foot', 'Right')","('Skill moves', '4')","('Weak foot', '5')","('International reputation', '2')","('Work rate', 'High/ Low')","('Body type', 'Normal (170-185)')","('Real face', 'No')","('Release clause', '€32.3M')","('ID', '253002')","('Position', 'SUB')","('Kit number', '22')","('Joined', 'May 26, 2019')",,"('Contract valid until', '2024')",sassuolo,"('Position', 'CAM')","('Kit number', '18')",italy,"('Crossing', '43')","('Finishing', '79')","('Heading accuracy', '69')","('Short passing', '77')","('Volleys', '68')","('Dribbling', '79')","('Curve', '62')","('FK Accuracy', '60')","('Long passing', '52')","('Ball control', '81')","('Acceleration', '85')","('Sprint speed', '77')","('Agility', '77')","('Reactions', '77')","('Balance', '85')","('Shot power', '72')","('Jumping', '60')","('Stamina', '67')","('Strength', '55')","('Long shots', '72')","('Aggression', '45')","('Interceptions', '18')","('Positioning', '78')","('Vision', '67')","('Penalties', '76')","('Composure', '75')","('Defensive awareness', '21')","('Standing tackle', '23')","('Sliding tackle', '21')","('GK Diving', '13')","('GK Handling', '14')","('GK Kicking', '7')","('GK Positioning', '7')","('GK Reflexes', '9')"
8,"('Player URL', 'https://sofifa.com/player/2002...","('Short Name', 'S. Berghuis')","('Player Name', 'Steven Berghuis')","('Nationality', 'Netherlands')","('Position', 'CAM RW')","('Age', '29')","('Birthday', 'Dec 19, 1991')","('Height', '182')","('Weight', '75')","('Overall Rating', '82')","('Potential', '82')","('Value', '€30M')","('Wage', '€26K')","('Preferred foot', 'Left')","('Skill moves', '4')","('Weak foot', '3')","('International reputation', '2')","('Work rate', 'High/ Medium')","('Body type', 'Lean (170-185)')","('Real face', 'Yes')","('Release clause', '€42M')","('ID', '200260')","('Position', 'RCM')","('Kit number', '23')","('Joined', 'Jul 19, 2021')",,"('Contract valid until', '2025')",ajax,"('Position', 'CAM')","('Kit number', '11')",netherlands,"('Crossing', '84')","('Finishing', '81')","('Heading accuracy', '48')","('Short passing', '83')","('Volleys', '76')","('Dribbling', '82')","('Curve', '86')","('FK Accuracy', '76')","('Long passing', '82')","('Ball control', '84')","('Acceleration', '83')","('Sprint speed', '78')","('Agility', '84')","('Reactions', '79')","('Balance', '75')","('Shot power', '82')","('Jumping', '56')","('Stamina', '79')","('Strength', '61')","('Long shots', '85')","('Aggression', '70')","('Interceptions', '37')","('Positioning', '80')","('Vision', '83')","('Penalties', '84')","('Composure', '80')","('Defensive awareness', '41')","('Standing tackle', '36')","('Sliding tackle', '30')","('GK Diving', '9')","('GK Handling', '11')","('GK Kicking', '7')","('GK Positioning', '7')","('GK Reflexes', '15')"
9,"('Player URL', 'https://sofifa.com/player/1895...","('Short Name', 'T. Müller')","('Player Name', 'Thomas Müller')","('Nationality', 'Germany')","('Position', 'CAM RM CF')","('Age', '31')","('Birthday', 'Sep 13, 1989')","('Height', '185')","('Weight', '76')","('Overall Rating', '87')","('Potential', '87')","('Value', '€66M')","('Wage', '€140K')","('Preferred foot', 'Right')","('Skill moves', '3')","('Weak foot', '4')","('International reputation', '4')","('Work rate', 'High/ High')","('Body type', 'Lean (185+)')","('Real face', 'Yes')","('Release clause', '€108.9M')","('ID', '189596')","('Position', 'CAM')","('Kit number', '25')","('Joined', 'Aug 10, 2008')",,"('Contract valid until', '2024')",fc-bayern-munchen,"('Position', 'CAM')","('Kit number', '13')",germany,"('Crossing', '85')","('Finishing', '88')","('Heading accuracy', '81')","('Short passing', '85')","('Volleys', '84')","('Dribbling', '77')","('Curve', '81')","('FK Accuracy', '59')","('Long passing', '79')","('Ball control', '84')","('Acceleration', '66')","('Sprint speed', '68')","('Agility', '72')","('Reactions', '94')","('Balance', '71')","('Shot power', '78')","('Jumping', '77')","('Stamina', '86')","('Strength', '66')","('Long shots', '82')","('Aggression', '62')","('Interceptions', '58')","('Positioning', '96')","('Vision', '86')","('Penalties', '70')","('Composure', '84')","('Defensive awareness', '47')","('Standing tackle', '57')","('Sliding tackle', '46')","('GK Diving', '6')","('GK Handling', '7')","('GK Kicking', '11')","('GK Positioning', '14')","('GK Reflexes', '14')"


In [4]:

import ast

# Đọc DataFrame từ file CSV

# Hàm xử lý để loại bỏ key và giữ lại giá trị
def process_data(x):
    try:
        return ast.literal_eval(x)[1]
    except:
        return x

# Áp dụng hàm xử lý cho toàn bộ DataFrame
df = data_frame.applymap(process_data)

# Xuất ra CSV hoặc hiển thị DataFrame
df.to_csv('output1.csv', index=False, encoding='utf-8-sig')  # Xuất DataFrame ra file CSV
print(df)


  df = data_frame.applymap(process_data)


                                                      0               1   \
0                                             PLAYER_URL      Short Name   
1      https://sofifa.com/player/232669/sarpreet-sing...        S. Singh   
2      https://sofifa.com/player/238277/daniel-gazdag...       D. Gazdag   
3      https://sofifa.com/player/186561/aaron-ramsey/...       A. Ramsey   
4      https://sofifa.com/player/236547/jan-sykora/22...       J. Sýkora   
...                                                  ...             ...   
18838  https://sofifa.com/player/263248/ioannis-tsout...   I. Tsoutsouki   
18839  https://sofifa.com/player/140814/stavros-georg...     S. Georgiou   
18840  https://sofifa.com/player/263249/stylianos-vro...      S. Vrontis   
18841  https://sofifa.com/player/262563/marios-kokkin...  M. Kokkinoftas   
18842  https://sofifa.com/player/267767/stefanos-kitt...       S. Kittos   

                       2               3                4    5             6   \
0     

In [8]:
df.to_csv('test.csv')