In [2]:
import asyncio
import rawg
import pandas as pd
import boto3
import os


In [3]:
# Initialize S3 client
s3 = boto3.client('s3',
    aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
    region_name=os.getenv('AWS_REGION')
)

In [4]:
# Create bucket
bucket_name = 'rawg-pyspark'

try:
    s3.create_bucket(
        Bucket=bucket_name,
        CreateBucketConfiguration={
            'LocationConstraint': os.getenv('AWS_REGION')
        }
    )
    print(f"Successfully created bucket: {bucket_name}")
    
    # Set up bucket folders/prefixes
    folders = [
        'raw/games/',      # Raw data from RAWG API
        'processed/games/' # Transformed data
    ]
    
    for folder in folders:
        s3.put_object(Bucket=bucket_name, Key=folder)
        print(f"Created folder: {folder}")
        
except Exception as e:
    print(f"Error: {e}")

Error: An error occurred (BucketAlreadyOwnedByYou) when calling the CreateBucket operation: Your previous request to create the named bucket succeeded and you already own it.


In [12]:
async def fetch_games_with_rawg(start_date, end_date):
    games_list = []
    
    async with rawg.ApiClient(
        rawg.Configuration(
            api_key={'key': os.getenv('RAWG_API_KEY')}
        )
    ) as api_client:
        api = rawg.GamesApi(api_client)
        
        try:
            games = await api.games_list(
                dates=f"{start_date},{end_date}",
                ordering='-ratings'
            )
            
            for game in games.results:
                platforms = [p['platform']['name'] for p in game.to_dict()['platforms']] if game.platforms else []
                
                game_data = {
                    'id': game.id,
                    'name': game.name,
                    'released': game.released,
                    'rating': float(game.rating) if game.rating else None,
                    'ratings_count': int(game.ratings_count) if game.ratings_count else None,
                    'metacritic': int(game.metacritic) if game.metacritic else None,
                    'playtime': int(game.playtime) if game.playtime else None,
                    'platforms': ', '.join(platforms),
                    'slug': game.slug,
                    'background_image': game.background_image,
                    'rating_top': int(game.rating_top) if game.rating_top else None
                }
                games_list.append(game_data)
                
        except Exception as e:
            print(f"Error fetching data: {e}")
    
    return pd.DataFrame(games_list)

In [18]:
# Fetch data
df_games = await fetch_games_with_rawg('2024-06-01', '2024-12-01')
df_games.head()

Unnamed: 0,id,name,released,rating,ratings_count,metacritic,playtime,platforms,slug,background_image,rating_top
0,58806,Satisfactory,2024-09-11,4.29,289,,12.0,PC,satisfactory,https://media.rawg.io/media/games/9b0/9b03581c...,5.0
1,303576,Vampire: The Masquerade - Bloodlines 2,2024-11-30,3.89,255,,329.0,"PC, PlayStation 5, Xbox One, PlayStation 4, Xb...",vampire-the-masquerade-bloodlines-2,https://media.rawg.io/media/games/fb5/fb5e0fdb...,5.0
2,58386,S.T.A.L.K.E.R. 2: Heart of Chornobyl,2024-11-20,3.81,197,,3.0,"PC, Xbox Series S/X",stalker-2,https://media.rawg.io/media/games/3e8/3e81585e...,5.0
3,616688,Core Keeper,2024-08-26,4.0,63,,4.0,"PC, Linux",core-keeper,https://media.rawg.io/media/games/8d9/8d980d35...,4.0
4,481913,Black Myth: Wukong,2024-08-20,4.37,71,,,"PC, PlayStation 5, Xbox Series S/X",black-myth-wu-kong,https://media.rawg.io/media/games/779/77988e89...,5.0


In [19]:
# Save to CSV and upload to S3
csv_buffer = df_games.to_csv(index=False)
s3.put_object(Bucket='rawg-pyspark', Key='raw/games/games_2024_06_12.csv', Body=csv_buffer)

{'ResponseMetadata': {'RequestId': '5G42QYKPVDXZZXSG',
  'HostId': 'aTyKGSAlcWsWxicisMLNaDQgWXTrxodvhSsBIs3Owq4WABRuwffodL5gx4MMBlt7rFEU/J3YSgg=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'aTyKGSAlcWsWxicisMLNaDQgWXTrxodvhSsBIs3Owq4WABRuwffodL5gx4MMBlt7rFEU/J3YSgg=',
   'x-amz-request-id': '5G42QYKPVDXZZXSG',
   'date': 'Tue, 03 Dec 2024 07:26:46 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"225da50f84883ba0272f286124f7fa7a"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"225da50f84883ba0272f286124f7fa7a"',
 'ServerSideEncryption': 'AES256'}