In [1]:
import requests
import keys
import pandas as pd
from datetime import datetime
import numpy as np
import json

In [2]:
# base_url = 'https://id.twitch.tv/oauth2/token'
# params = {
#     'client_id': keys.API_CLIENT_ID,
#     'client_secret': keys.API_CLIENT_SECRET,
#     'grant_type': 'client_credentials'
# }
# response = requests.post(base_url, params=params)


In [3]:
# print(response.json())

In [4]:
def fetch():
    base_url = 'https://api.igdb.com/v4/games'
    headers = {
        'Client-ID': keys.API_CLIENT_ID,
        'Authorization': f'Bearer {keys.API_AUTHORIZATION_TOKEN}',
    }
    all_data = []
    offset = 0

    while True:
        # Define the query for the game
        data = f"""fields id, slug, summary, 
     genres.slug, keywords.slug, name,genres,keywords,similar_games,
    first_release_date,total_rating; 
    where rating > 70 & first_release_date>1111977600;
    limit 500; 
    offset {offset};"""

        try:
            # Make the POST request
            response = requests.post(base_url, headers=headers, data=data)

            # Check for non-200 status codes
            if response.status_code != 200:
                print(f"Error: {response.status_code}, {response.text}")
                return

            games = response.json()

            # Check if no data is returned
            if not games:
                print(f"No more games found at offset {offset}. Exiting.")
                break

            # Append the games to the list
            all_data.extend(games)
            print(f"Scraped {offset + len(games)} games")

            # Stop if fewer than 500 games are returned
            if len(games) < 500:
                break

            # Increment the offset
            offset += 500

        except requests.RequestException as e:
            print(f"Request failed: {e}")
            break

    return all_data

In [5]:
fetched=fetch()

Scraped 500 games
Scraped 1000 games
Scraped 1500 games
Scraped 2000 games
Scraped 2500 games
Scraped 3000 games
Scraped 3500 games
Scraped 4000 games
Scraped 4500 games
Scraped 5000 games
Scraped 5500 games
Scraped 6000 games
Scraped 6500 games
Scraped 7000 games
Scraped 7500 games
Scraped 8000 games
Scraped 8500 games
Scraped 8913 games


In [6]:
df=pd.DataFrame(fetched)

In [7]:
df.head()

Unnamed: 0,id,first_release_date,genres,keywords,name,similar_games,slug,summary,total_rating
0,6964,1173139200,"[{'id': 4, 'slug': 'fighting'}, {'id': 7, 'slu...","[{'id': 61, 'slug': 'martial-arts'}, {'id': 97...",Def Jam: Icon,"[1244, 1246, 2574, 3875, 4755, 7498, 8479, 109...",def-jam-icon,"Infusing hip-hop music, culture and lifestyle ...",73.324424
1,122646,1563840000,"[{'id': 13, 'slug': 'simulator'}]","[{'id': 129, 'slug': 'bloody'}, {'id': 287, 's...",People Playground,"[15536, 17131, 25311, 26574, 36553, 37419, 515...",people-playground,"Shoot, stab, burn, poison, tear, vaporise, or ...",70.168166
2,394,1178582400,"[{'id': 12, 'slug': 'role-playing-rpg'}, {'id'...","[{'id': 78, 'slug': 'anime'}, {'id': 151, 'slu...",Final Fantasy Tactics: The War of the Lions,"[428, 5550, 9608, 10388, 14394, 22387, 26841, ...",final-fantasy-tactics-the-war-of-the-lions,An updated version of the PlayStation game Fin...,89.002382
3,9284,1417132800,"[{'id': 8, 'slug': 'platform'}, {'id': 31, 'sl...","[{'id': 905, 'slug': 'difficult'}, {'id': 962,...",Wings of Vi,"[24426, 28070, 28168, 28309, 29032, 36198, 478...",wings-of-vi,Wings of Vi is a challenging 2D platformer in ...,70.151379
4,46722,1417392000,"[{'id': 12, 'slug': 'role-playing-rpg'}, {'id'...","[{'id': 5, 'slug': 'zombies'}, {'id': 122, 'sl...",Touhou Puppet Dance Performance,"[105447, 137783, 206935, 210552, 210591, 21061...",touhou-puppet-dance-performance,Touhou Puppet Dance Performance is a spiritual...,89.858497


In [8]:
def get_slug(items):
    if pd.isna(items).all() if isinstance(items, list) else pd.isna(items):
        return []
    
    return [
        item.get('slug', 'unknown')  
        for item in items
    ]



def get_time(unix):
    if pd.isna(unix):
        return None  
    return datetime.fromtimestamp(unix)


In [9]:
df['genres']=df['genres'].apply(get_slug)

In [10]:
df['keywords']=df['keywords'].apply(get_slug)

In [11]:
df['first_release_date']=df['first_release_date'].apply(get_time)

In [12]:
df["genres"] = df["genres"].apply(lambda x: " ".join(x) if isinstance(x, list) else str(x))
df["keywords"] = df["keywords"].apply(lambda x: " ".join(x) if isinstance(x, list) else str(x))

In [13]:
df.head()

Unnamed: 0,id,first_release_date,genres,keywords,name,similar_games,slug,summary,total_rating
0,6964,2007-03-06 05:30:00,fighting music,martial-arts hand-to-hand-combat achievements ...,Def Jam: Icon,"[1244, 1246, 2574, 3875, 4755, 7498, 8479, 109...",def-jam-icon,"Infusing hip-hop music, culture and lifestyle ...",73.324424
1,122646,2019-07-23 05:30:00,simulator,bloody physics 2d death gore pixel-art ragdoll...,People Playground,"[15536, 17131, 25311, 26574, 36553, 37419, 515...",people-playground,"Shoot, stab, burn, poison, tear, vaporise, or ...",70.168166
2,394,2007-05-08 05:30:00,role-playing-rpg simulator strategy turn-based...,anime medieval jrpg robots party-based-combat ...,Final Fantasy Tactics: The War of the Lions,"[428, 5550, 9608, 10388, 14394, 22387, 26841, ...",final-fantasy-tactics-the-war-of-the-lions,An updated version of the PlayStation game Fin...,89.002382
3,9284,2014-11-28 05:30:00,platform adventure indie,difficult female-protagonist action-adventure ...,Wings of Vi,"[24426, 28070, 28168, 28309, 29032, 36198, 478...",wings-of-vi,Wings of Vi is a challenging 2D platformer in ...,70.151379
4,46722,2014-12-01 05:30:00,role-playing-rpg adventure indie,zombies hell vampire magic 2d pokemon jrpg fro...,Touhou Puppet Dance Performance,"[105447, 137783, 206935, 210552, 210591, 21061...",touhou-puppet-dance-performance,Touhou Puppet Dance Performance is a spiritual...,89.858497


In [14]:
df.shape

(8913, 9)

In [15]:
df.isnull().sum()

id                      0
first_release_date      0
genres                  0
keywords                0
name                    0
similar_games         103
slug                    0
summary                25
total_rating            0
dtype: int64

In [16]:
df.dropna(inplace=True)

In [17]:
df.shape

(8789, 9)

In [18]:
df = df.drop_duplicates(subset=['name'])

(8031, 9)

In [21]:
df.to_csv('data.csv',index=False)