In [1]:
import asyncio
import rawg
import pandas as pd
import boto3
import os


In [2]:
# Initialize S3 client
s3 = boto3.client('s3',
    aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
    region_name=os.getenv('AWS_REGION')
)

In [4]:
# Create bucket
bucket_name = 'rawg-pyspark'

try:
    s3.create_bucket(
        Bucket=bucket_name,
        CreateBucketConfiguration={
            'LocationConstraint': os.getenv('AWS_REGION')
        }
    )
    print(f"Successfully created bucket: {bucket_name}")
    
    # Set up bucket folders/prefixes
    folders = [
        'raw/games/',      # Raw data from RAWG API
        'processed/games/' # Transformed data
    ]
    
    for folder in folders:
        s3.put_object(Bucket=bucket_name, Key=folder)
        print(f"Created folder: {folder}")
        
except Exception as e:
    print(f"Error: {e}")

Successfully created bucket: rawg-pyspark
Created folder: raw/games/
Created folder: processed/games/


In [5]:
async def fetch_games_with_rawg(start_date, end_date):
    games_list = []
    
    async with rawg.ApiClient(
        rawg.Configuration(
            api_key={'key': os.getenv('RAWG_API_KEY')}
        )
    ) as api_client:
        api = rawg.GamesApi(api_client)
        
        try:
            games = await api.games_list(
                dates=f"{start_date},{end_date}",
                ordering='-released',
                page_size=40,
                page=1
            )
            
            for game in games.results:
                platforms = [p['platform']['name'] for p in game.to_dict()['platforms']] if game.platforms else []
                
                game_data = {
                    'id': game.id,
                    'name': game.name,
                    'released': game.released,
                    'rating': float(game.rating) if game.rating else None,
                    'ratings_count': int(game.ratings_count) if game.ratings_count else None,
                    'metacritic': int(game.metacritic) if game.metacritic else None,
                    'playtime': int(game.playtime) if game.playtime else None,
                    'platforms': ', '.join(platforms),
                    'slug': game.slug,
                    'background_image': game.background_image,
                    'rating_top': int(game.rating_top) if game.rating_top else None
                }
                games_list.append(game_data)
                
        except Exception as e:
            print(f"Error fetching data: {e}")
    
    return pd.DataFrame(games_list)

In [7]:
# Fetch data
df_games = await fetch_games_with_rawg('2024-08-01', '2024-08-31')
df_games.head()

Unnamed: 0,id,name,released,rating,ratings_count,metacritic,playtime,platforms,slug,background_image,rating_top
0,988108,World Without Reason,2024-08-31,,,,,"PC, macOS, Linux",world-without-reason,https://media.rawg.io/media/screenshots/2fa/2f...,
1,988238,Slutlike,2024-08-31,,,,,PC,slutlike,https://media.rawg.io/media/screenshots/ca9/ca...,
2,988228,Canfield Solitaire Classic Card Game,2024-08-31,,,,,PC,canfield-solitaire-classic-card-game,https://media.rawg.io/media/screenshots/7b8/7b...,
3,988227,Monkey Forward,2024-08-31,,,,,PC,monkey-forward,https://media.rawg.io/media/screenshots/5bd/5b...,
4,988225,PacaPomo,2024-08-31,,,,,PC,pacapomo,https://media.rawg.io/media/screenshots/242/24...,


In [8]:
# Save to CSV and upload to S3
csv_buffer = df_games.to_csv(index=False)
s3.put_object(Bucket='rawg-pyspark', Key='raw/games/games_2024_08.csv', Body=csv_buffer)

{'ResponseMetadata': {'RequestId': 'T3XBQES2C8QPPSDJ',
  'HostId': 'UGod8JgmDbLgt9AOhRU3VB80G3I1vBWaQ8qrovt7tYL3i3ZIUlARhB46L+OkdTYuVA5wIPhPLI8=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'UGod8JgmDbLgt9AOhRU3VB80G3I1vBWaQ8qrovt7tYL3i3ZIUlARhB46L+OkdTYuVA5wIPhPLI8=',
   'x-amz-request-id': 'T3XBQES2C8QPPSDJ',
   'date': 'Tue, 03 Dec 2024 06:44:31 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"7dc80c7f90f6d13889f4081b092f537b"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"7dc80c7f90f6d13889f4081b092f537b"',
 'ServerSideEncryption': 'AES256'}