In [1]:
import json
import requests
import time
from tqdm import tqdm
import pandas as pd

In [2]:
# the rawg api call url
# example: requests.get('https://api.rawg.io/api/games?page=1&page_size=40').json()
rawg_api = 'https://api.rawg.io/api/games' # ?page=<number>&page_size=<number>

In [3]:
def get_genres(result):
    '''
    Input:
        results: RAWG video game API 'results' json
    Returns:
        list of game genres
    '''
    return [genre['name'] for genre in result['genres']]

In [4]:
def list_games(json):
    '''
    Input:
        json: RAWG video game API 'results' json
    Returns:
        A dictionary containing the games'  id, name
        rating, ratings breakdown, release date,
        metacritic score, ratings count, and genres
    '''
    games = []
    for result in json['results']:
        games_dict = {}
        genres = get_genres(result)
        
        games_dict['id'] = result['id']
        games_dict['name'] = result['name']
        games_dict['rating'] = result['rating']
        games_dict['ratings'] = result['ratings']
        games_dict['released'] = result['released']
        games_dict['metacritic'] = result['metacritic']
        games_dict['ratings_count'] = result['ratings_count']
        games_dict['genres'] = genres
        
        # if more than 3 people voted for the rating save the data
        if games_dict['ratings_count'] > 3:
            games.append(games_dict)
    return games

In [5]:
def to_json(game, filename):
    '''
    Input:
        game: dictionary containing the data to write to the file
        filename: a string stating the name and path you wish to save the file at
    Returns:
        json file
    '''
    with open(f'{filename}.json', 'a') as file:
            file.write(f'{json.dumps(game)}\n')
    return

In [6]:
def call_api(api, pages, filename):
    '''
   Input:
       api: rawg api url
       pages: which pages I want to scrape from the api
       filename: the filename and path I want to save the file
    Returns:
        json file containing the the games'  id, name
        rating, ratings breakdown, release date,
        metacritic score, ratings count and genres, and a list
        of games that failed to save to json    
    '''
    not_added =[]
    total_games = []
    
    # make and api call extract what you need
    for page in tqdm(range(pages[0], pages[1])):
        json = requests.get(f'{api}?page={page}&page_size=40').json()
        games = list_games(json)
        try: # try save the file to json
            for game in games: 
                to_json(game, filename)
        except: # if failed, save in a python list to address later
            not_added.append(game)
        time.sleep(1)       
    return not_added

In [7]:
not_added = call_api(rawg_api, [1, 8300], 'raw_data')

100%|████████████████████████████████████████████████████████████████████████████| 8299/8299 [4:37:58<00:00,  2.01s/it]


In [9]:
df = pd.read_json('raw_data.json', lines=True)
df.head()

Unnamed: 0,id,name,rating,ratings,released,metacritic,ratings_count,genres
0,3498,Grand Theft Auto V,4.48,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2013-09-17,96.0,2758,"[Action, Shooter]"
1,4200,Portal 2,4.61,"[{'id': 5, 'title': 'exceptional', 'count': 16...",2011-04-19,95.0,2405,"[Shooter, Puzzle]"
2,3328,The Witcher 3: Wild Hunt,4.68,"[{'id': 5, 'title': 'exceptional', 'count': 20...",2015-05-18,93.0,2552,[RPG]
3,5679,The Elder Scrolls V: Skyrim,4.39,"[{'id': 5, 'title': 'exceptional', 'count': 10...",2011-11-11,,2015,"[Action, RPG]"
4,12020,Left 4 Dead 2,4.08,"[{'id': 4, 'title': 'recommended', 'count': 76...",2009-11-17,89.0,1431,"[Action, Shooter]"


In [12]:
print(f'{df.isna().any()}\n')
df.info()

id               False
name             False
rating           False
ratings          False
released          True
metacritic        True
ratings_count    False
genres           False
dtype: bool

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11298 entries, 0 to 11297
Data columns (total 8 columns):
id               11298 non-null int64
name             11298 non-null object
rating           11298 non-null float64
ratings          11298 non-null object
released         11088 non-null object
metacritic       2271 non-null float64
ratings_count    11298 non-null int64
genres           11298 non-null object
dtypes: float64(2), int64(2), object(4)
memory usage: 706.2+ KB
