In [None]:
import pandas as pd
import requests
import json

In [111]:
def feature_collect(feature):
    return [i.get(feature) for i in rikishi_data]

In [113]:
# API location
url = 'https://www.sumo-api.com/api/rikishis'

#Ping the website
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    rikishi_data = data.get('records', [])

    # Collect lists of relevant features from rikishi_data
    id = feature_collect('id')
    ring_name = feature_collect('shikonaEn')
    current_rank = feature_collect('currentRank')
    heya = feature_collect('heya')
    birthday = feature_collect('birthDate')
    height = feature_collect('height')
    weight = feature_collect('weight')
    debut = feature_collect('debut')

    #Construct features into a dataframe
    rikishi_df = pd.DataFrame({
        'id':id,
        'ring_name':ring_name,
        'current_rank':current_rank,
        'heya':heya,
        'birthday':birthday,
        'height':height,
        'weight':weight,
        'debut':debut
    })

else:
    print('Failed to connect to website', response.status_code)

In [135]:
# rikishi_df.to_csv('./sumo/data/rikishi_df.csv', index = False)

#### Use Rikishi IDs to call the API and pull data from the statistics section.

In [140]:
# Collect all of the rikishi IDs into a list because they are necessary to collect other features from the API.
id_list = rikishi_df['id'].tolist()

In [163]:
stats_list = []

for id in id_list:
    stats_url = f'https://www.sumo-api.com/api/rikishi/{id}/stats'
    stats_response = requests.get(stats_url)
    if stats_response.status_code == 200:
        stats_data = stats_response.json()
        stats_list.append(stats_data)
    else:
        print('Failed to connect to website', response.status_code)

In [174]:
#Pull features out of stats_likst dictionary
total_losses = [rikishi.get('totalLosses') for rikishi in stats_list]
total_wins = [rikishi.get('totalWins') for rikishi in stats_list]
total_matches = [rikishi.get('totalMatches') for rikishi in stats_list]

#Put it in a dataframe, inclouding ID for reference and joining
stats_df = pd.DataFrame({'id':id_list,
                         'total_wins':total_wins,
                         'total_losses':total_losses,
                         'total_matches':total_matches
                        })
            

In [178]:
sumo_df = pd.merge(rikishi_df, stats_df, on = 'id')

In [179]:
sumo_df.head()

Unnamed: 0,id,ring_name,current_rank,heya,birthday,height,weight,debut,total_wins,total_losses,total_matches
0,218,Dairaido,Sandanme 27 East,Takadagawa,1980-04-17T00:00:00Z,177.0,151.0,199603,619,580,1199
1,299,Kayatoiwa,Sandanme 70 East,Minato,1991-07-13T00:00:00Z,176.0,103.5,200703,337,347,684
2,36,Myogiryu,Juryo 8 West,Sakaigawa,1990-02-27T15:00:00Z,188.0,154.0,200905,601,597,1198
3,275,Oshozan,Sandanme 48 West,Naruto,2000-05-11T00:00:00Z,168.0,123.0,201603,164,153,317
4,614,Daitengu,Jonidan 67 East,Takadagawa,2004-06-17T00:00:00Z,177.0,93.0,202301,28,35,63


In [180]:
sumo_df.to_csv('./sumo/data/sumo_df.csv', index = False)