In [11]:
import requests
import pandas as pd
import time

CLIENT_ID = '27m2cp7hi3y212prw0bpnsgi02lg3b'
CLIENT_SECRET = 'o42yuztwrcgiuiyzh638nc6mrfbyxr'

auth_url = 'https://id.twitch.tv/oauth2/token'
auth_params = {
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
    'grant_type': 'client_credentials'
}

auth_response = requests.post(auth_url, data=auth_params)
auth_data = auth_response.json()
access_token = auth_data['access_token']

url = "https://api.igdb.com/v4/games"
headers = {
    'Client-ID': CLIENT_ID,
    'Authorization': f'Bearer {access_token}'
}

fields = '''
fields name, genres.name, rating, platforms.name, first_release_date, involved_companies.company.name;
where rating != null & genres != null & platforms != null & first_release_date != null & involved_companies != null;
'''

all_games_data = []
limit = 100
offset = 0
total_games = 0


while total_games < 10000:
    body = f'''
    {fields}
    limit {limit};
    offset {offset};
    '''

    response = requests.post(url, headers=headers, data=body)
    games_data = response.json()

    complete_games = [
        game for game in games_data
        if 'rating' in game and 'genres' in game and 'platforms' in game
        and 'first_release_date' in game and 'involved_companies' in game
    ]


    all_games_data.extend(complete_games)
    total_games += len(complete_games)

    print(f"Retrieved {total_games} complete games so far...")

    if len(games_data) < limit:
        break

    offset += limit
    time.sleep(0.3)

games_df = pd.DataFrame(all_games_data)

def extract_info(column, subfield):
    return ', '.join([item[subfield] for item in column]) if isinstance(column, list) else 'Unknown'

def extract_companies(companies):
    return ', '.join([item['company']['name'] for item in companies]) if isinstance(companies, list) else 'Unknown'

games_df['genres'] = games_df['genres'].apply(lambda x: extract_info(x, 'name'))
games_df['platforms'] = games_df['platforms'].apply(lambda x: extract_info(x, 'name'))
games_df['companies'] = games_df['involved_companies'].apply(extract_companies)
games_df['release_date'] = pd.to_datetime(games_df['first_release_date'], unit='s', errors='coerce')
games_df.drop(columns=['involved_companies', 'first_release_date'], inplace=True)

games_df.to_csv('filtered_igdb_games_data.csv', index=False)

print(f"Total complete games retrieved: {total_games}")
print("Filtered Dataset:")
print(games_df)


Retrieved 100 complete games so far...
Retrieved 200 complete games so far...
Retrieved 300 complete games so far...
Retrieved 400 complete games so far...
Retrieved 500 complete games so far...
Retrieved 600 complete games so far...
Retrieved 700 complete games so far...
Retrieved 800 complete games so far...
Retrieved 900 complete games so far...
Retrieved 1000 complete games so far...
Retrieved 1100 complete games so far...
Retrieved 1200 complete games so far...
Retrieved 1300 complete games so far...
Retrieved 1400 complete games so far...
Retrieved 1500 complete games so far...
Retrieved 1600 complete games so far...
Retrieved 1700 complete games so far...
Retrieved 1800 complete games so far...
Retrieved 1900 complete games so far...
Retrieved 2000 complete games so far...
Retrieved 2100 complete games so far...
Retrieved 2200 complete games so far...
Retrieved 2300 complete games so far...
Retrieved 2400 complete games so far...
Retrieved 2500 complete games so far...
Retrieved

In [12]:
# Sort the DataFrame by the 'rating' column in descending order
games_df = games_df.sort_values(by='rating', ascending=False)

# Save to CSV for future analysis
games_df.to_csv('filtered_igdb_games_data_sorted.csv', index=False)

print("Games sorted by rating (high to low):")
print(games_df)


Games sorted by rating (high to low):
          id                                             genres  \
8527   15188                                Simulator, Strategy   
5252  264773                  Hack and slash/Beat 'em up, Indie   
6194   46147                                 Role-playing (RPG)   
5632    5010                                   Simulator, Sport   
68     88973            Platform, Role-playing (RPG), Adventure   
...      ...                                                ...   
1800   87776                          Role-playing (RPG), Indie   
5491   13457  Role-playing (RPG), Strategy, Turn-based strat...   
9374    9077                                             Arcade   
3650  142217                                   Adventure, Indie   
1857  144040                      Role-playing (RPG), Adventure   

                                                   name  \
8527                                 Imperium Galactica   
5252                                   