# API Script For sumo-api.com

In [37]:
import pandas as pd
import requests
import json

In [38]:
def feature_collect(feature, raw_data):
    
    '''
    Function accepts two arguments
    feature: Name of feature (dictionary key) in which the value will be extracted.  Examples include 'height' and 'totalWins'
    raw_data: List of dictionaries where the features are stored

    Function will return the value of the feature(key)
    '''
    
    return [i.get(feature) for i in raw_data]

#### Collect invidiaul rikishi (sumo wrestler) data

In [49]:
# API location for individual rikishi data such as height and weight
url = 'https://www.sumo-api.com/api/rikishis'
limit = 1000
skip = 0
all_rikishi_data = []

#Ping the website and if successful response, collect desired features from JSON file
while True: 
    response = requests.get(url, params = {'skip': skip, 'limit': limit, 'intai': True})
    if response.status_code == 200:
        data = response.json()
        rikishi_data = data.get('records', [])
        if not rikishi_data:
            break
        all_rikishi_data.extend(rikishi_data)
        skip += limit
        print(f'Collected {len(rikishi_data)} records.  Total collected: {len(all_rikishi_data)}')
    else:
        print('Failed to connect to website', response.status_code)
        break

# Collect lists of relevant features from rikishi_data
id = feature_collect('id', all_rikishi_data)
sumodb_id = feature_collect('sumodbId', all_rikishi_data)
nsk_id = feature_collect('nskId', all_rikishi_data)
ring_name = feature_collect('shikonaEn', all_rikishi_data)
current_rank = feature_collect('currentRank', all_rikishi_data)
heya = feature_collect('heya', all_rikishi_data)
birthday = feature_collect('birthDate', all_rikishi_data)
height = feature_collect('height', all_rikishi_data)
weight = feature_collect('weight', all_rikishi_data)
debut = feature_collect('debut', all_rikishi_data)
retirement = feature_collect('intai', all_rikishi_data)

Collected 1000 records.  Total collected: 1000
Collected 1000 records.  Total collected: 2000
Collected 1000 records.  Total collected: 3000
Collected 1000 records.  Total collected: 4000
Collected 1000 records.  Total collected: 5000
Collected 1000 records.  Total collected: 6000
Collected 1000 records.  Total collected: 7000
Collected 1000 records.  Total collected: 8000
Collected 942 records.  Total collected: 8942


In [50]:
all_rikishi_data[0]

{'id': 7522,
 'sumodbId': 10438,
 'nskId': 0,
 'shikonaEn': 'Hakushuzan',
 'shikonaJp': '柏秀山',
 'heya': '-',
 'birthDate': '0001-01-01T00:00:00Z',
 'shusshin': '-',
 'debut': '195401',
 'intai': '1959-05-01T00:00:00Z',
 'updatedAt': '2024-07-26T01:52:44.911Z'}

In [51]:
#Construct features into a dataframe
rikishi_df = pd.DataFrame({
    'id':id,
    'sumodb_id':sumodb_id,
    'nsk_id':nsk_id,
    'ring_name':ring_name,
    'current_rank':current_rank,
    'heya':heya,
    'birthday':birthday,
    'height':height,
    'weight':weight,
    'debut':debut,
    'retirement':retirement
})

In [52]:
rikishi_df

Unnamed: 0,id,sumodb_id,nsk_id,ring_name,current_rank,heya,birthday,height,weight,debut,retirement
0,7522,10438,0,Hakushuzan,,-,0001-01-01T00:00:00Z,,,195401,1959-05-01T00:00:00Z
1,2973,5772,0,Wakahokkai Gentaro,,Hanakago,1943-03-02T00:00:00Z,,,195907,1966-09-01T00:00:00Z
2,6428,6679,0,Kakuryuasahi,,Oshima,1971-02-24T00:00:00Z,178.5,111.0,198705,1988-11-01T00:00:00Z
3,3683,381,0,Katsumayama Shuichi,,Naruto,1978-04-05T00:00:00Z,177.0,113.5,199403,2005-05-01T00:00:00Z
4,6560,7299,0,Otsuru,,Takadagawa,1967-05-14T00:00:00Z,186.0,77.0,198303,1987-09-01T00:00:00Z
...,...,...,...,...,...,...,...,...,...,...,...
8937,8939,12891,4282,Musubiyama,Jonokuchi 16 East,Hanaregoma,2006-03-30T15:00:00Z,188.0,104.0,202407,
8938,8940,12886,4276,Kikuchi,Jonokuchi 16 West,Futagoyama,2009-02-02T15:00:00Z,175.0,105.0,202407,
8939,8941,12892,4283,Yamada,Jonokuchi 18 East,Nishonoseki,2004-06-01T15:00:00Z,173.0,75.0,202407,
8940,8942,12894,4285,Ishizaki,Makushita 60 East,Takasago,2000-08-08T15:00:00Z,172.0,117.0,202407,


In [53]:
#Write rikishi_df to a csv file
rikishi_df.to_csv('../data/rikishi_df.csv', index = False)

#### Use Rikishi IDs to call the API and pull data statistical data for each rikishi.

In [58]:
# Collect all of the rikishi IDs into a list because they are necessary to collect other features from the API.
id_list = rikishi_df['id'].tolist()

# Empty list to store JSON info
stats_list = []

# Call the API and store JSON into list if call is successful
for id in id_list:
    stats_url = f'https://www.sumo-api.com/api/rikishi/{id}/stats'
    try:
        stats_response = requests.get(stats_url, timeout = 30)
        if stats_response.status_code == 200:
            stats_data = stats_response.json()
            stats_list.append(stats_data)
        else:
            print('Failed to connect to website', response.status_code)
    except requests.Timeout:
        print(f'Request time out for id {id}')
    except requests.RequestException as e:
        print(f'An error occured: {e}')

In [61]:
#Pull features out of stats_likst dictionary
total_losses = feature_collect('totalLosses', stats_list)
total_wins = feature_collect('totalWins', stats_list)
total_matches = feature_collect('totalMatches', stats_list)

#Put it in a dataframe, inclouding ID for reference and joining
stats_df = pd.DataFrame({'id':id_list,
                         'total_wins':total_wins,
                         'total_losses':total_losses,
                         'total_matches':total_matches
                        })

In [62]:
stats_df

Unnamed: 0,id,total_wins,total_losses,total_matches
0,7522,27,36,63
1,2973,146,160,306
2,6428,26,23,49
3,3683,223,211,434
4,6560,78,73,151
...,...,...,...,...
8937,8939,4,3,7
8938,8940,2,5,7
8939,8941,2,5,7
8940,8942,0,0,0


In [63]:
# Save stats_df to a csv
stats_df.to_csv('../data/stats_df.csv', index = False)

#### Merge rikishi and stats dataframe together into one final dataframed titled sumo_df.  Save sumo_df to a csv file

In [64]:
#Merge rikishi and stats dataframe into one
sumo_df = pd.merge(rikishi_df, stats_df, on = 'id')

In [65]:
sumo_df.head()

Unnamed: 0,id,sumodb_id,nsk_id,ring_name,current_rank,heya,birthday,height,weight,debut,retirement,total_wins,total_losses,total_matches
0,7522,10438,0,Hakushuzan,,-,0001-01-01T00:00:00Z,,,195401,1959-05-01T00:00:00Z,27,36,63
1,2973,5772,0,Wakahokkai Gentaro,,Hanakago,1943-03-02T00:00:00Z,,,195907,1966-09-01T00:00:00Z,146,160,306
2,6428,6679,0,Kakuryuasahi,,Oshima,1971-02-24T00:00:00Z,178.5,111.0,198705,1988-11-01T00:00:00Z,26,23,49
3,3683,381,0,Katsumayama Shuichi,,Naruto,1978-04-05T00:00:00Z,177.0,113.5,199403,2005-05-01T00:00:00Z,223,211,434
4,6560,7299,0,Otsuru,,Takadagawa,1967-05-14T00:00:00Z,186.0,77.0,198303,1987-09-01T00:00:00Z,78,73,151


In [66]:
#Save sumo_df to a csv
sumo_df.to_csv('../data/sumo_df.csv', index = False)