In [2]:
import os
import json
from datetime import datetime
import polars as pl
from helper_functions import save_data_to_json, load_json

def preprocess_game_details(input_folder, output_folder):
    # Ensure the output directory exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Helper function to convert date to timestamp
    def convert_date_to_timestamp(date_str):
        try:
            return int(datetime.strptime(date_str, "%d %b, %Y").timestamp())
        except ValueError:
            return None
    
    # Helper function to convert list to boolean columns
    def convert_list_to_bool_columns(data, key):
        items = data.get(key, [])
        return {f"{key}_{item.replace(' ', '_').lower()}": True for item in items}
    
    # Iterate over all .json files in the input directory
    for filename in os.listdir(input_folder):
        if filename.endswith(".json"):
            input_path = os.path.join(input_folder, filename)
            
            # Read JSON file
            df = load_json(input_path)
            
            preprocessed_rows = []
            
            # Iterate over each row in the DataFrame
            for data in df.to_dicts():
                # Extract and process relevant data
                preprocessed_data = {
                    "appid": data.get("appid"),
                    "is_free": bool(data.get("is_free")),
                    "price": data.get("price"),
                    "release_date": convert_date_to_timestamp(data.get("release_date")),
                    "number_of_reviews": data.get("number_of_reviews"),
                    "metacritic_score": data.get("metacritic_score"),
                    "usk_rating": data.get("usk_rating"),
                    "required_age": data.get("required_age"),
                }
                
                # Dynamic conversion of lists to boolean columns
                preprocessed_data.update(convert_list_to_bool_columns(data, "platforms"))
                preprocessed_data.update(convert_list_to_bool_columns(data, "genres"))
                
                preprocessed_rows.append(preprocessed_data)
            
            # Path for the processed file
            output_path = os.path.join(output_folder, filename)
            
            # Write processed data to a new .json file
            save_data_to_json(preprocessed_rows, output_path)

# Input and output directories
input_folder = 'data/game_details'
output_folder = 'data/game_details_preprocessed'

# Perform preprocessing
preprocess_game_details(input_folder, output_folder)
