# Imports

In [2]:
import requests
import csv
import json
import pycountry
import os
import pandas as pd

cwd = os.getcwd()
country_data_folder = os.path.join(cwd, "country_data")

# create the country_data_folder if it doesn't exist
if not os.path.exists(country_data_folder):
    os.makedirs(country_data_folder)

# Get a list of all country ISO codes
country_codes = [country.alpha_2 for country in pycountry.countries]

for iso in country_codes:
    # send a GET request to the API endpoint with the specified country code
    c_code = iso
    
    # define the API endpoint URL
    url = 'https://api.chess.com/pub/country/' + c_code + '/players'
    
    user_response = requests.get(url)

    # check if the response was successful
    if user_response.status_code == 200:

        data_json = user_response.json()
        
        user_list=[]
        
        for v in data_json["players"]:
            user_list.append(v)
        
        players_data = {
             'Usernames' : user_list,
         }
        
        # create a pandas DataFrame
        df = pd.DataFrame(players_data)
    
        # create a CSV fil and write the header row
        filename = os.path.join(country_data_folder, 'players_username_' + iso + '.csv')
        
        #write the dataframe to the csv
        df.to_csv(filename, index=False, header=False)
        
        print('Data saved to players_username_' + iso + '.csv')
    else:
        print('Error retrieving data from API' + iso)
        

Data saved to players_username_AW.csv
Data saved to players_username_AF.csv
Data saved to players_username_AO.csv
Data saved to players_username_AI.csv
Data saved to players_username_AX.csv
Data saved to players_username_AL.csv
Data saved to players_username_AD.csv
Data saved to players_username_AE.csv
Data saved to players_username_AR.csv
Data saved to players_username_AM.csv
Data saved to players_username_AS.csv
Error retrieving data from APIAQ
Error retrieving data from APITF
Data saved to players_username_AG.csv
Data saved to players_username_AU.csv
Data saved to players_username_AT.csv
Data saved to players_username_AZ.csv
Data saved to players_username_BI.csv
Data saved to players_username_BE.csv
Data saved to players_username_BJ.csv
Error retrieving data from APIBQ
Data saved to players_username_BF.csv
Data saved to players_username_BD.csv
Data saved to players_username_BG.csv
Data saved to players_username_BH.csv
Data saved to players_username_BS.csv
Data saved to players_usern

Data saved to players_username_TH.csv
Data saved to players_username_TJ.csv
Error retrieving data from APITK
Data saved to players_username_TM.csv
Data saved to players_username_TL.csv
Data saved to players_username_TO.csv
Data saved to players_username_TT.csv
Data saved to players_username_TN.csv
Data saved to players_username_TR.csv
Data saved to players_username_TV.csv
Data saved to players_username_TW.csv
Data saved to players_username_TZ.csv
Data saved to players_username_UG.csv
Data saved to players_username_UA.csv
Error retrieving data from APIUM
Data saved to players_username_UY.csv
Data saved to players_username_US.csv
Data saved to players_username_UZ.csv
Data saved to players_username_VA.csv
Data saved to players_username_VC.csv
Data saved to players_username_VE.csv
Data saved to players_username_VG.csv
Data saved to players_username_VI.csv
Data saved to players_username_VN.csv
Data saved to players_username_VU.csv
Error retrieving data from APIWF
Data saved to players_usern

The error for certain countries, stem from players of that country not being on chess.com. So far, we have each file containing usernames of each player in each country. Now, I'm going to collect the names of the top 10 countries with most players.

In [3]:
import dask.dataframe as dd
import os

# Get a list of all CSV files in the directory
csv_files = [file for file in os.listdir(country_data_folder) if file.endswith('.csv')]

# Create a dictionary to store row counts
row_counts = {}

# Iterate over each CSV file and get the row count
for file in csv_files:
    # Construct the file path
    file_path = os.path.join(country_data_folder, file)

    # Read the CSV file into a Dask DataFrame
    df = dd.read_csv(file_path)

    # Count the number of rows in the DataFrame
    row_count = len(df)

    # Store the row count in the dictionary
    row_counts[file] = row_count

# Sort the row counts dictionary by value in descending order
sorted_files = sorted(row_counts.items(), key=lambda x: x[1], reverse=True)

# Get the top 10 files with the most rows
largest_files = sorted_files[:10]

# Print the result
print("Top 10 files with the most rows:")
for file, count in largest_files:
    print("File:", file)
    print("Number of rows:", count)
    print()


Top 10 files with the most rows:
File: players_username_AE.csv
Number of rows: 9999

File: players_username_AL.csv
Number of rows: 9999

File: players_username_AM.csv
Number of rows: 9999

File: players_username_AR.csv
Number of rows: 9999

File: players_username_AT.csv
Number of rows: 9999

File: players_username_AU.csv
Number of rows: 9999

File: players_username_AZ.csv
Number of rows: 9999

File: players_username_BD.csv
Number of rows: 9999

File: players_username_BE.csv
Number of rows: 9999

File: players_username_BG.csv
Number of rows: 9999



I have found that there is a limit to requests for the api, I have contacted the chess.com support team for further assistance.

In [13]:
import csv
import requests
# Define the API endpoint URL
url = 'https://api.chess.com/pub/player/'


# Loop through each country code
for iso in country_codes:
    # Open the CSV file containing the players for this country
    #print(country_data_folder + f'\players_username_{iso}.csv')
    if os.path.exists(country_data_folder + f'\players_username_{iso}.csv'):
        with open(country_data_folder + f'\players_username_{iso}.csv', 'r') as file:
            reader = csv.reader(file)
            next(reader)  # Skip the header row
        
            # Open a new CSV file to write the player data to
            with open(f'player_data_{iso}.csv', 'w', newline='') as output_file:
                writer = csv.writer(output_file)
            
                # Write the header row
                writer.writerow(['username', 'player_id', 'status', 'country', 'joined', 'is_streamer', 'fide'])
            
                # Loop through each player in the input CSV file
                for row in reader:
                    username = row[0]
                
                    # Send a GET request to the API endpoint with the specified username
                    response = requests.get(url + username)
                
                    # Check if the response was successful
                    if response.status_code == 200:
                        # Parse the JSON response
                        data = response.json()
                    
                        # Extract the fields we're interested in
                        player_id = data['player_id']
                        status = data['status']
                        country = data['country']
                        joined = data['joined']
                        is_streamer = data['is_streamer']
                        fide = data.get('fide', '')
                        
                    
                        # Write the data to the output CSV file
                        writer.writerow([username, player_id, status, country, joined, is_streamer, fide])
                        print(f'Written successfully')
                    else:
                        print(f'Error retrieving data for {username} from API')

    

Written successfully


StopIteration: 

Data saved to players_username_AW.csv
Data saved to players_username_AF.csv
Data saved to players_username_AO.csv
Data saved to players_username_AI.csv
Data saved to players_username_AX.csv
Data saved to players_username_AL.csv
Data saved to players_username_AD.csv
Data saved to players_username_AE.csv
Data saved to players_username_AR.csv
Data saved to players_username_AM.csv
Data saved to players_username_AS.csv
Error retrieving data from APIAQ
Error retrieving data from APITF
Data saved to players_username_AG.csv
Data saved to players_username_AU.csv
Data saved to players_username_AT.csv
Data saved to players_username_AZ.csv
Data saved to players_username_BI.csv
Data saved to players_username_BE.csv
Data saved to players_username_BJ.csv
Error retrieving data from APIBQ
Data saved to players_username_BF.csv
Data saved to players_username_BD.csv
Data saved to players_username_BG.csv
Data saved to players_username_BH.csv
Data saved to players_username_BS.csv
Data saved to players_usern

Data saved to players_username_TH.csv
Data saved to players_username_TJ.csv
Error retrieving data from APITK
Data saved to players_username_TM.csv
Data saved to players_username_TL.csv
Data saved to players_username_TO.csv
Data saved to players_username_TT.csv
Data saved to players_username_TN.csv
Data saved to players_username_TR.csv
Data saved to players_username_TV.csv
Data saved to players_username_TW.csv
Data saved to players_username_TZ.csv
Data saved to players_username_UG.csv
Data saved to players_username_UA.csv
Error retrieving data from APIUM
Data saved to players_username_UY.csv
Data saved to players_username_US.csv
Data saved to players_username_UZ.csv
Data saved to players_username_VA.csv
Data saved to players_username_VC.csv
Data saved to players_username_VE.csv
Data saved to players_username_VG.csv
Data saved to players_username_VI.csv
Data saved to players_username_VN.csv
Data saved to players_username_VU.csv
Error retrieving data from APIWF
Data saved to players_usern

Data saved to players_username_FR.csv
