In [12]:
import requests
import keys
import pandas as pd
from datetime import datetime
import numpy as np

import re
import unicodedata
import string

In [2]:
# base_url = 'https://id.twitch.tv/oauth2/token'
# params = {
#     'client_id': keys.API_CLIENT_ID,
#     'client_secret': keys.API_CLIENT_SECRET,
#     'grant_type': 'client_credentials'
# }
# response = requests.post(base_url, params=params)


In [3]:
# print(response.json())

In [2]:
def fetch():
    base_url = 'https://api.igdb.com/v4/games'
    headers = {
        'Client-ID': keys.API_CLIENT_ID,
        'Authorization': f'Bearer {keys.API_AUTHORIZATION_TOKEN}',
    }
    all_data = []
    offset = 0

    while True:
        # Define the query for the game
        data = f"""fields id, slug, summary, 
     genres.slug, keywords.slug, name,genres,keywords,
    first_release_date,total_rating, cover.image_id; 
    where rating > 70 & first_release_date>1111977600;
    limit 500; 
    offset {offset};"""

        try:
            # Make the POST request
            response = requests.post(base_url, headers=headers, data=data)

            # Check for non-200 status codes
            if response.status_code != 200:
                print(f"Error: {response.status_code}, {response.text}")
                return

            games = response.json()

            # Check if no data is returned
            if not games:
                print(f"No more games found at offset {offset}. Exiting.")
                break

            # Append the games to the list
            all_data.extend(games)
            print(f"Scraped {offset + len(games)} games")

            # Stop if fewer than 500 games are returned
            if len(games) < 500:
                break

            # Increment the offset
            offset += 500

        except requests.RequestException as e:
            print(f"Request failed: {e}")
            break

    return all_data

In [20]:
fetched=fetch()

Scraped 500 games
Scraped 1000 games
Scraped 1500 games
Scraped 2000 games
Scraped 2500 games
Scraped 3000 games
Scraped 3500 games
Scraped 4000 games
Scraped 4500 games
Scraped 5000 games
Scraped 5500 games
Scraped 6000 games
Scraped 6500 games
Scraped 7000 games
Scraped 7500 games
Scraped 8000 games
Scraped 8500 games
Scraped 9000 games
Scraped 9020 games


In [21]:
df=pd.DataFrame(fetched)

In [22]:
df.head()

Unnamed: 0,id,cover,first_release_date,genres,keywords,name,slug,summary,total_rating
0,10184,"{'id': 82735, 'image_id': 'co1ru7'}",1411430400,"[{'id': 11, 'slug': 'real-time-strategy-rts'},...","[{'id': 77, 'slug': 'tower-defense'}, {'id': 5...",Defense Grid 2,defense-grid-2,"Defense Grid 2, DG2, is the highly-anticipated...",82.923298
1,12329,"{'id': 359313, 'image_id': 'co7p8x'}",1417046400,"[{'id': 31, 'slug': 'adventure'}, {'id': 34, '...","[{'id': 226, 'slug': 'magic'}, {'id': 27094, '...",Fate/Hollow Ataraxia,fate-slash-hollow-ataraxia,Fate/Hollow Ataraxia is a Japanese visual nove...,96.469402
2,9592,"{'id': 29732, 'image_id': 'o9uof8ogyhrmui43ql3i'}",1198800000,"[{'id': 12, 'slug': 'role-playing-rpg'}, {'id'...","[{'id': 5, 'slug': 'zombies'}]",Sonny,sonny,"Dear Human,\n\nZombies. You've shot them, stab...",73.342457
3,8734,"{'id': 132464, 'image_id': 'co2u7k'}",1272326400,"[{'id': 8, 'slug': 'platform'}, {'id': 32, 'sl...","[{'id': 599, 'slug': 'browser-based'}, {'id': ...",Super Mario Bros. Crossover,super-mario-bros-crossover,Super Mario Bros. Crossover is a fan-made cros...,89.941855
4,165304,"{'id': 169868, 'image_id': 'co3n2k'}",1275264000,"[{'id': 15, 'slug': 'strategy'}]",,Age of War 2,age-of-war-2,Battle your way through the ages in this insan...,74.290353


In [23]:
def get_slug(items):
    if pd.isna(items).all() if isinstance(items, list) else pd.isna(items):
        return []
    
    return [
        item.get('slug', 'unknown')  
        for item in items
    ]



def get_time(unix):
    if pd.isna(unix):
        return None  
    return datetime.fromtimestamp(unix)


In [24]:
df['genres']=df['genres'].apply(get_slug)

In [25]:
df['keywords']=df['keywords'].apply(get_slug)

In [26]:
df['first_release_date']=df['first_release_date'].apply(get_time)

In [27]:
df["genres"] = df["genres"].apply(lambda x: " ".join(x) if isinstance(x, list) else str(x))
df["keywords"] = df["keywords"].apply(lambda x: " ".join(x) if isinstance(x, list) else str(x))

In [28]:
df['cover'] = df['cover'].apply(
    lambda x: f"https://images.igdb.com/igdb/image/upload/t_cover_big/{x['image_id']}.jpg" 
    if isinstance(x, dict) and 'image_id' in x else None
)

In [29]:
def clean_text(text):
    if not isinstance(text, str):
        text = str(text) if text is not None else ""
    text = unicodedata.normalize('NFKC', text)

    text = ''.join(c for c in text if c in string.printable)

    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [30]:
df['genres'] = df['genres'].apply(clean_text)
df['keywords'] = df['keywords'].apply(clean_text)
df['summary'] = df['summary'].apply(clean_text)

In [31]:
df.head()

Unnamed: 0,id,cover,first_release_date,genres,keywords,name,slug,summary,total_rating
0,10184,https://images.igdb.com/igdb/image/upload/t_co...,2014-09-23 05:30:00,real-time-strategy-rts simulator strategy indie,tower-defense robots steam achievements steam-...,Defense Grid 2,defense-grid-2,"Defense Grid 2, DG2, is the highly-anticipated...",82.923298
1,12329,https://images.igdb.com/igdb/image/upload/t_co...,2014-11-27 05:30:00,adventure visual-novel,magic nsfw-version-exists,Fate/Hollow Ataraxia,fate-slash-hollow-ataraxia,Fate/Hollow Ataraxia is a Japanese visual nove...,96.469402
2,9592,https://images.igdb.com/igdb/image/upload/t_co...,2007-12-28 05:30:00,role-playing-rpg strategy adventure indie,zombies,Sonny,sonny,"Dear Human, Zombies. You've shot them, stabbed...",73.342457
3,8734,https://images.igdb.com/igdb/image/upload/t_co...,2010-04-27 05:30:00,platform indie,browser-based unofficial zelda free-to-play ma...,Super Mario Bros. Crossover,super-mario-bros-crossover,Super Mario Bros. Crossover is a fan-made cros...,89.941855
4,165304,https://images.igdb.com/igdb/image/upload/t_co...,2010-05-31 05:30:00,strategy,,Age of War 2,age-of-war-2,Battle your way through the ages in this insan...,74.290353


In [32]:
df.shape

(9020, 9)

In [33]:
df.isnull().sum()

id                     0
cover                 41
first_release_date     0
genres                 0
keywords               0
name                   0
slug                   0
summary                0
total_rating           0
dtype: int64

In [34]:
df.dropna(inplace=True)

In [35]:
df.shape

(8979, 9)

In [36]:
df = df.drop_duplicates(subset=['name'])

In [40]:
cover_url =df['cover'].iloc[10]
cover_url

'https://images.igdb.com/igdb/image/upload/t_cover_big/co63nd.jpg'

In [41]:
from PIL import Image
import requests
from io import BytesIO
response = requests.get(cover_url)
image = Image.open(BytesIO(response.content))
image.show()

In [42]:
df.to_csv('data.csv',index=False)