In [315]:
import json
from openai import OpenAI
from tiktoken import encoding_for_model
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff
import pandas as pd

In [35]:
# Initiate client

client = OpenAI(
    api_key = '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'
)

In [60]:
# Load the input data
input_file_path = "data/output/input_data_for_gpt_genre_cleaning_85.json"
taxonomy_file_path = "data/output/sensor_tower_game_classification.json"
output_file_path = "data/output_gpt/game_validation_gpt_api_90_pct.json"
model_name = "gpt-4o"  # Read more about OpenAI models: platform.openai.com/docs/models

In [61]:
# Load the game data
with open(input_file_path, "r") as file:
    games_data = json.load(file)

In [29]:
# Standard taxonomy (assuming it's predefined or provided)
with open(taxonomy_file_path, "r") as file:
    standard_taxonomy = json.load(file)

# Supporting Functions

In [25]:
def generate_prompt(game_data):
    """
    Generate a prompt using the template, game data, and standard taxonomy.
    """
    template = f"""
    You are a mobile game expert. You will be given a dataset of game classification and relevant game information. Your task is to determine whether the classification is valid.

    **INSTRUCTIONS**
    - Standard taxonomy to classify games in terms of class, genre, subgenre is provided below. Please follow this standard in giving the game appropriate classification.
    - Game data is also provided below. Game is usually comprised of multiple apps (for different operating systems and markets)
    - Carefully analyze short description, description, subtitle, and promo text of each app of the game to determine which classification is most appropriate for the game.
    - If the apps’ short description, description, subtitle, and promo text are not available and not detailed enough, you should do additional research about the game and use your general game industry knowledge to classify it. These cases where additional research is required should be noted in the output.
    - Compare your own classification with the current classification of the game. If they are different then the current classification might probably have some problem. Flag these cases in the output as well.
    - Provide the rationale for your classification and why you think the current classification of the game is valid/invalid in maximum 5 sentences.

    **OUTPUT FORMAT** 
    JSON object with the following properties
    - game_id
    - game_name
    - current_game_class
    - current_game_genre
    - current_game_subgenre
    - is_current_genre_valid (True or False)
    - corrected_game_class
    - corrected_game_genre
    - corrected_game_subgenre
    - rationale
    - requiring_additional_research (True or False)

    All the JSON properties must always be present.

    **SUPPORTING DATA**
    STANDARD TAXONOMY:
    {json.dumps(standard_taxonomy, indent=4)}

    GAME DATA:
    {json.dumps(game_data, indent=4)}
    """
    return template.strip()

In [24]:
def count_total_tokens(data, model=model_name):
    """Counts the total number of tokens for all prompts."""
    total_tokens = 0
    for game in data:
        prompt = generate_prompt(game)
        total_tokens += count_tokens(prompt, model=model)
    return total_tokens

In [101]:
@retry(wait=wait_random_exponential(min=20, max=120), stop=stop_after_attempt(10))
def completion_with_backoff(**kwargs):
    return client.chat.completions.create(**kwargs)

In [66]:
def send_requests_live(data, model=model_name):
    """Sends live requests to the API for each game's data."""
    results = []
    for game in data:
        prompt = generate_prompt(game)
        
        response = completion_with_backoff(
            model=model,
            messages=[{"role": "system", "content": prompt}]
        )
        
        result = json_string_to_dict(
                response.choices[0].message.content.replace('```json','').replace('```','').strip()
            )
        
        results.append(result)
        
    return results

In [225]:
def json_string_to_dict(json_string):
    """
    Converts a JSON-like string into a dictionary.

    Args:
        json_string (str): The JSON-like string.

    Returns:
        dict: A dictionary representation of the JSON string.
    """
    try:
        # Parse the JSON string
        result_dict = json.loads(json_string)
        return result_dict
    except json.JSONDecodeError as e:
        print("Failed to parse JSON string. Error:", e)
        print("Error json_string:\n")
        print(json_string)
        return None

In [230]:
def json_string_to_dict_v2(json_string):
    """
    Converts a JSON-like string into a dictionary, handling potential formatting issues.

    Args:
        json_string (str): The JSON-like string.

    Returns:
        dict: A dictionary representation of the JSON string.
    """
    
    original_json_string = json_string
    
    try:
        # Replace single quotes with double quotes for valid JSON
        json_string = json_string.replace("'", '"')
        
        # Escape unescaped double quotes inside string values
        json_string = json_string.replace('": "', '\": \"').replace('", "', '\", \"')

        # Parse the JSON string
        result_dict = json.loads(json_string)
        return result_dict
    except json.JSONDecodeError as e:
        print("Failed to parse JSON string. Error:", e)
        print("Problematic JSON string:")
        print(original_json_string)
        return None


In [239]:
def list_without_none(l):
    return list(
        filter(
            lambda x: not x is None,
            l
        )
    )

In [233]:
def extract_game_data(responses):
    """
    Convert a list of OpenAI API response objects into a list of game dictionaries.
    
    Args:
        responses (list): List of ChatCompletion objects.

    Returns:
        list: List of dictionaries representing game data.
    """
    result = []

    for response in responses:

        # Extract the content of the first choice
        content = response.choices[0].message.content

        # Remove Markdown code block syntax if present
        clean_content = content.replace('```json','').replace('```','').replace('True','true').replace('False', 'false').strip()

        # Convert the cleaned JSON string into a Python dictionary
        game_data = json_string_to_dict(clean_content)

        # Append the parsed dictionary to the result list
        result.append(game_data)
            
    return result

In [267]:
def get_game_ids_list(list_of_dicts):
    return list(
        map(
            lambda x: x['game_id'],
            list_of_dicts
        )
    )

In [280]:
def create_game_token_count_table(game_data):
    return list(
        map(
            lambda x:
                {
                    'game_id': x['game_id'],
                    'game_name': x['game_name'],
                    'token_count': count_tokens(generate_prompt(x), model=model_name),
                },
            game_data
        )
    )

In [285]:
def create_app_count_table(game_data):
    return list(
        map(
            lambda x:
                {
                    'game_id': x['game_id'],
                    'game_name': x['game_name'],
                    'app_count': len(x['individual_app_info']),
                },
            game_data
        )
    )

In [296]:
def get_app_with_longest_description(individual_app_info_list):
    
    char_count_table = list(
        map(
            lambda x: {
                'app_id': x['app_id'],
                'description_len': len(x['description']) 
            },
            individual_app_info_list
        )
    )
    
    max_len = max(
        list(
            map(
                lambda x: x['description_len'],
                char_count_table
            )
        )
    )
    
    max_len_apps = list(
        filter(
            lambda x: x['description_len'] == max_len,
            char_count_table
        )
    )
    
    max_len_app_ids = list(
        map(
            lambda x: x['app_id'],
            max_len_apps
        )
    )
    
    return list(
        filter(
            lambda x: x['app_id'] in max_len_app_ids,
            individual_app_info_list
        )
    ) 

# Main

In [62]:
# Count the total number of tokens

total_tokens = count_total_tokens(games_data)

In [63]:
total_tokens

5623005

In [119]:
games_token_count = list(
    map(
        lambda x:
            {
                'game_id': x['game_id'],
                'game_name': x['game_name'],
                'token_count': count_tokens(generate_prompt(x), model=model_name),
            },
        games_data
    )
)

In [121]:
## Get games where token_count > 30000 for manual processing 

In [123]:
games_exceeding_limit = list(
    filter(
        lambda x: x['token_count'] >= 30000,
        games_token_count
    )
)

In [126]:
game_ids_exceeding_limit = list(
    map(
        lambda x: x['game_id'],
        games_exceeding_limit
    )
)

In [131]:
games_data_requiring_manual_processing = list(
    filter(
            lambda x: x['game_id'] in game_ids_exceeding_limit,
            games_data
        )
)

In [134]:
games_data_api_processing = list(
    filter(
        lambda x: not x['game_id'] in list(
            map(
                lambda x: x['game_id'],
                games_data_requiring_manual_processing
            )
        ),
        games_data
    )
)

In [68]:
# Call the API

Batch 1

In [69]:
results = []

In [73]:
for game in games_data:
    
    print('Asking ChatGPT about {} {}'.format(game['game_id'], game['game_name']))
    
    prompt = generate_prompt(game)
    
    print('Calling the API...')

    response = completion_with_backoff(
        model=model_name,
        messages=[{"role": "system", "content": prompt}]
    )
    
    print('Call successfuly! Saving the response...')

    result = json_string_to_dict(
            response.choices[0].message.content.replace('```json','').replace('```','').strip()
        )

    results.append(result)

Asking ChatGPT about 55c5013e02ac64f9c0001f1c Smurfs' Village
Calling the API...
Call successfuly! Saving the response...
Asking ChatGPT about 55d93e8f02ac645ad210a5fd 攻城掠地
Calling the API...
Call successfuly! Saving the response...
Asking ChatGPT about 5626a06702ac6437f1000047 率土之滨
Calling the API...
Call successfuly! Saving the response...
Asking ChatGPT about 6439381ea0f47f0e94b71f6f Giang Hồ: Bát Phái Phân Tranh
Calling the API...
Call successfuly! Saving the response...
Asking ChatGPT about 5d5609e88125b80b6f4a2657 기적의 검
Calling the API...
Call successfuly! Saving the response...
Asking ChatGPT about 5570f7715a3b416e6a000261 Ongame Sám cô - Xì tố Poker 7 lá
Calling the API...
Call successfuly! Saving the response...
Asking ChatGPT about 5b220c230e64c70b3e4d02d4 EA SPORTS FC Online M
Calling the API...
Call successfuly! Saving the response...
Asking ChatGPT about 6470444f124b5028b45d9743 Zombie.io: Potato Shooting
Calling the API...
Call successfuly! Saving the response...
Asking C

RetryError: RetryError[<Future at 0x7feab820b250 state=finished raised RateLimitError>]

In [254]:
results_without_none = list_without_none(results)

In [255]:
with open('data/output_gpt/genre_validation_gpt_output_json_batch_001.json', 'w', encoding='utf-8') as file:
    json.dump(results_without_none, file, ensure_ascii=False, indent=4)  # `ensure_ascii=False` keeps non-ASCII characters readable

Batch 2

In [91]:
processed_game_ids = list(
    map(
        lambda x: x['game_id'],
        list(
            filter(
                lambda x: not x is None,
                results
            )
        )
    )
)

In [97]:
unprocessed_games_data = list(
    filter(
        lambda x: not x['game_id'] in processed_game_ids,
        games_data
    )
)

In [102]:
results_2 = []

In [103]:
raw_results_2 = []

In [104]:
count = 1

for game in unprocessed_games_data:
    
    print('Asking ChatGPT about game number {} {} {}'.format(count, game['game_id'], game['game_name']))
    
    prompt = generate_prompt(game)
    
    print('Calling the API...')

    response = completion_with_backoff(
        model=model_name,
        messages=[{"role": "system", "content": prompt}]
    )
    
    print('Call successfuly! Saving the response...\n')
    
    raw_results_2.append(response)
    
    json_result = json_string_to_dict(
            response.choices[0].message.content.replace('```json','').replace('```','').strip()
        )

    results_2.append(json_result)
    
    count += 1

Asking ChatGPT about game number 1 55d3a1a802ac64350a000d6e Roblox
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 2 5378307a830f782dbe0000d2 MARVEL Puzzle Quest: Match RPG
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 3 55d3a19e02ac64350a00019b Emross War
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 4 63882256bfa0280535094a22 Revelation: New World
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 5 55d3a1a102ac64350a0006e6 卧龙吟豪华版-三国策略战争手游
Calling the API...


RetryError: RetryError[<Future at 0x7feab94dea90 state=finished raised RateLimitError>]

In [257]:
results_2_without_none = list_without_none(results_2)

In [258]:
with open('data/output_gpt/genre_validation_gpt_output_json_batch_002.json', 'w', encoding='utf-8') as file:
    json.dump(results_2_without_none, file, ensure_ascii=False, indent=4)  # `ensure_ascii=False` keeps non-ASCII characters readable

Batch 3

In [109]:
processed_game_ids = processed_game_ids + list(
    map(
        lambda x: x['game_id'],
        list(
            filter(
                lambda x: not x is None,
                results_2
            )
        )
    )
)

In [140]:
unprocessed_games_data_api_only = list(
    filter(
        lambda x: not x['game_id'] in processed_game_ids,
        games_data_api_processing
    )
)

In [142]:
results_3 = []
raw_results_3 = []

In [143]:
count = 1

for game in unprocessed_games_data_api_only:
    
    print('Asking ChatGPT about game number {} {} {}'.format(count, game['game_id'], game['game_name']))
    
    prompt = generate_prompt(game)
    
    print('Calling the API...')

    response = completion_with_backoff(
        model=model_name,
        messages=[{"role": "system", "content": prompt}]
    )
    
    print('Call successfuly! Saving the response...\n')
    
    raw_results_3.append(response)
    
    json_result = json_string_to_dict(
            response.choices[0].message.content.replace('```json','').replace('```','').strip()
        )

    results_3.append(json_result)
    
    count += 1

Asking ChatGPT about game number 1 55d3a1a802ac64350a000d4f 忘仙2
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 2 607d9df6dc2ede5c26edccb1 Ngôi Sao Lấp Lánh
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 3 576fc77df7b5cc0861000419 Empires & Puzzles: Dragon Dawn
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 4 6178da8bfb94934d110250cb Hồng Đồ Chi Hạ
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 5 6220bbe38b9e665095bc83e4 全民泡泡超人
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 6 5d9fe60d01281d2bde753c3d Immortal Taoists - Idle Manga
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 7 63a0fbe3ad4adb6b3ba39504 Tân OMG3Q VNG
Calling the API...
Call successfuly! Saving the response...

Failed to parse JSON string. Error

Call successfuly! Saving the response...

Asking ChatGPT about game number 56 5378434d830f782dbe0005c3 ZENONIA® 5
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 57 55c5240f02ac64f9c0002a31 War Robots Multiplayer Battles
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 58 63bdd6b32d63ee0ecbfc3636 Garena Cái Thế Tranh Hùng
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 59 55c40f43a0b4c526c9003a05 Three Kingdoms Global
Calling the API...
Call successfuly! Saving the response...

Failed to parse JSON string. Error: Expecting value: line 7 column 31 (char 246)
Asking ChatGPT about game number 60 55d3a1a802ac64350a000cb7 Frontline Commando
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 61 5661c5f102ac6436900000bc Last Empire - War Z: Strategy
Calling the API...
Call successfuly! Saving the response...

Asking

Call successfuly! Saving the response...

Asking ChatGPT about game number 110 55c52ff602ac64f9c0002dbe SimCity BuildIt
Calling the API...
Call successfuly! Saving the response...

Failed to parse JSON string. Error: Expecting value: line 7 column 31 (char 249)
Asking ChatGPT about game number 111 5f378fe871feeb730dbbc0fb Puzzles & Survival
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 112 55d20e3a02ac64ee73000033 Where's My Water? Free
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 113 5f09e992d871ea7bfa1d261b Travel Town - Merge Adventure
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 114 6364ea2f68948619f25880c9 RO仙境傳說：新世代的誕生
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 115 55c513a702ac64f9c0002599 Ace Fishing: Wild Catch
Calling the API...
Call successfuly! Saving the response...

Asking ChatGP

Call successfuly! Saving the response...

Asking ChatGPT about game number 164 55c5052f02ac64f9c00020b7 Fruit Ninja®
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 165 55de161402ac640423000042 Need for Speed™ Most Wanted
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 166 55d3a1a802ac64350a000c7b Wedding Dash
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 167 55c52bc102ac64f9c0002c6c Kill Shot
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 168 64870d70b3ae27253f16c069 Subway Surfers
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 169 568a601402ac64a409000104 Clash Royale
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 170 57b2857a071f46083500022e 리니지2 레볼루션
Calling the API...
Call successfuly! Saving the response

Call successfuly! Saving the response...

Failed to parse JSON string. Error: Expecting value: line 7 column 31 (char 245)
Asking ChatGPT about game number 218 62b18ff2991fd92dd394d299 秘境傳說：神木新世界
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 219 5aad42d99f479f0a74d40440 PUBG MOBILE
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 220 60f151ef6ffae745341f100f Doomsday: B.Duck
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 221 6362a8a3ad4adb78a8e5167b Bida 8 bi ZingPlay
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 222 551b7f69e3943916f200005b Pixel Gun 3D: Online Shooter
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 223 55c502cd02ac64f9c0001fe4 Minion Rush: Running Game
Calling the API...
Call successfuly! Saving the response...

Failed to parse JSON stri

Call successfuly! Saving the response...

Failed to parse JSON string. Error: Expecting value: line 7 column 31 (char 247)
Asking ChatGPT about game number 272 55d20e4902ac64ee73000308 Delicious Honeymoon Cruise HD
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 273 55c5028e02ac64f9c0001fbb Modern Combat 4: Zero Hour
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 274 55c50ebd02ac64f9c0002423 Slots™
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 275 55d3a1a802ac64350a000d37 Clash of Lords 2: Guild Castle
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 276 55d3a1a802ac64350a000cde Farm Heroes Saga
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 277 55d93f0802ac645ad2120ff1 乱斗西游2-十周年庆典
Calling the API...
Call successfuly! Saving the response...

Failed to parse

In [259]:
results_3_without_none = list_without_none(results_3)

In [260]:
with open('data/output_gpt/genre_validation_gpt_output_json_batch_003.json', 'w', encoding='utf-8') as file:
    json.dump(results_3_without_none, file, ensure_ascii=False, indent=4)  # `ensure_ascii=False` keeps non-ASCII characters readable

batch 4 (process error-json-parsing results in batch 3)

In [149]:
processed_game_ids = processed_game_ids + list(
    map(
        lambda x: x['game_id'],
        list(
            filter(
                lambda x: not x is None,
                results_3
            )
        )
    )
)

In [152]:
batch_3_unprocessed_games_ids = list(
    map(
        lambda x: x['game_id'],
        unprocessed_games_data_api_only
    )
)

In [155]:
batch_3_results_not_none = list(
    filter(
        lambda x: not x is None,
        results_3
    )
)

In [157]:
batch_3_results_not_none_ids = list(
    map(
        lambda x: x['game_id'],
        batch_3_results_not_none
    )
)

In [161]:
batch_3_results_none_ids = list(
    filter(
        lambda x: not x in batch_3_results_not_none_ids,
        batch_3_unprocessed_games_ids
    )
)

In [176]:
error_response_objects = []

for i in range(len(batch_3_unprocessed_games_ids)):
    if batch_3_unprocessed_games_ids[i] in batch_3_results_none_ids:
        error_response_objects.append(
            raw_results_3[i]
        )

In [234]:
# Example usage
# Assuming `response_objects` is the list of ChatCompletion objects provided
extracted_data = extract_game_data(error_response_objects)

Failed to parse JSON string. Error: Expecting value: line 1 column 1 (char 0)
Error json_string:

Given the provided game data and standard taxonomy, let's determine if the current classification of the game "Tiên Vương 4D" is valid.

1. **Game Data**:
   - **Game Description**: The game is introduced as an MMORPG (Massively Multiplayer Online Role-Playing Game) with features like martial arts mythology, fighting bosses, automatic battle systems, and leveling up with high-end equipment. The elements of the game suggest a persistent online world with multiplayer interactions.
   - **Current Classification**:
     - **Class**: Action & Strategy
     - **Genre**: RPG
     - **Subgenre**: MMORPG

2. **Standard Taxonomy Analysis**:
   - **Action & Strategy**: This class includes games like Action or RPG that typically require complex mechanics and significant player time investment.
   - **RPG**: The RPG genre focuses on character leveling and storytelling, which fit MMORPG characteristics.

In [241]:
results_3_point_5 = list_without_none(extracted_data) 

In [243]:
results_3_point_5.append(
    {
        "game_id": "65fd06dba6a66211427de1cd",
        "game_name": "Tiên Vương 4D",
        "current_game_class": "Action & Strategy",
        "current_game_genre": "RPG",
        "current_game_subgenre": "MMORPG",
        "is_current_genre_valid": True,
        "corrected_game_class": "Action & Strategy",
        "corrected_game_genre": "RPG",
        "corrected_game_subgenre": "MMORPG",
        "rationale": "The game is correctly classified as an MMORPG under the RPG genre of Action & Strategy class, as per the standard taxonomy. The game features and elements described align with MMORPG characteristics, supporting the entire current classification.",
        "requiring_additional_research": False
    }
)

In [266]:
results_1_to_3p5 = results_without_none + results_2_without_none + results_3_without_none + results_3_point_5

In [274]:
with open('data/output_gpt/genre_validation_gpt_output_json_batch_001_to_003p5.json', 'w', encoding='utf-8') as file:
    json.dump(results_1_to_3p5, file, ensure_ascii=False, indent=4)  # `ensure_ascii=False` keeps non-ASCII characters readable

In [268]:
game_ids_results_1_to_3p5 = get_game_ids_list(results_1_to_3p5)

batch 5 (process the rest)

In [277]:
games_data_batch_5 = list(
    filter(
        lambda x: not x['game_id'] in game_ids_results_1_to_3p5,
        games_data
    )
)

In [298]:
games_data_batch_5_longest_description_apps_only = list(
    map(
        lambda x: {
            'game_id': x['game_id'],
            'game_name': x['game_name'],
            'game_class': x['game_class'],
            'game_genre': x['game_genre'],
            'game_subgenre': x['game_subgenre'],
            'individual_app_info': get_app_with_longest_description(x['individual_app_info']),
        },
        games_data_batch_5
    )
)

In [301]:
games_data_batch_5_longest_description_apps_only_token_count_table = create_game_token_count_table(games_data_batch_5_longest_description_apps_only)

In [303]:
results_5 = []
raw_results_5 = []

In [304]:
count = 1

for game in games_data_batch_5_longest_description_apps_only:
    
    print('Asking ChatGPT about game number {} {} {}'.format(count, game['game_id'], game['game_name']))
    
    prompt = generate_prompt(game)
    
    print('Calling the API...')

    response = completion_with_backoff(
        model=model_name,
        messages=[{"role": "system", "content": prompt}]
    )
    
    print('Call successfuly! Saving the response...\n')
    
    raw_results_5.append(response)
    
    json_result = json_string_to_dict(
            response.choices[0].message.content.replace('```json','').replace('```','').replace('True','true').replace('False', 'false').strip()
        )

    results_5.append(json_result)
    
    count += 1

Asking ChatGPT about game number 1 55d3a1a102ac64350a0006e6 卧龙吟豪华版-三国策略战争手游
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 2 58a652db7100fb0af90015ba 崩坏3-星穹铁道联动
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 3 55d20e4002ac64ee7300011c 英雄战魂OL
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 4 55d9560502ac645ad227637e 时空猎人-枪械三觉
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 5 55d20e5f02ac64ee730007bd 天龙3D-功夫新少林
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 6 55d3a19e02ac64350a00014c 博雅德州撲克 texas poker Boyaa
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 7 55d3a1a902ac64350a00109b Clash of Throne
Calling the API...
Call successfuly! Saving the response...

Asking ChatGPT about game number 8 55d3a19e02ac64350a0002

In [307]:
## Concatenate all the results list

results_all = results_1_to_3p5 + results_5

In [311]:
with open('data/output_gpt/genre_validation_gpt_output.json', 'w', encoding='utf-8') as file:
    json.dump(results_all, file, ensure_ascii=False, indent=4)  # `ensure_ascii=False` keeps non-ASCII characters readable

In [313]:
## Export to csv for manual check 

In [316]:
df_results_all = pd.DataFrame(results_all)

In [318]:
df_results_all.to_csv('data/output_gpt/genre_validation_gpt_output.csv')

# Draft Zone

In [54]:
test_game = list(
    filter(
        lambda x: x['game_id'] == '642ea283c084ff1c6afcc825',
        games_data
    )
)

In [55]:
test_game

[{'game_id': '642ea283c084ff1c6afcc825',
  'game_name': 'Honkai Impact 3-VN',
  'game_class': 'Action & Strategy',
  'game_genre': 'Strategy',
  'game_subgenre': 'MOBA',
  'individual_app_info': [{'app_id': 6448864292,
    'name': 'Honkai Impact 3-VN',
    'short_description': None,
    'description': 'Honkai Impact 3 là game hành động mạo hiểm mới của HoYoverse.\nGóc nhìn 3D tinh xảo, hệ thống hành động nối chiêu vô hạn, cảm giác sảng khoái mạnh mẽ... Tạo nên trò chơi hành động của thời đại!\nChuỗi câu chuyện Honkai rung động lòng người, cốt truyện sâu sắc, đội ngũ lồng tiếng hào hoa, sẽ đưa bạn chìm đắm vào câu chuyện, tận hưởng cảm giác chưa từng có.\nNguy cơ trên Trái Đất tạm thời đã chấm dứt, cuộc mạo hiểm mới trên Sao Hỏa đã bắt đầu.\nGặp gỡ những Valkyrie với tính cách khác nhau, cùng khám phá bí mật của nền văn minh Sao Hỏa.\n\nHệ thống chỉ huy Hyperion đã sẵn sàng, đang xử lý yêu cầu đăng nhập... Kiểm tra hoàn tất!\nToàn thể nghe lệnh, mở khóa chốt phòng hộ, động cơ tải bắt đầ

In [58]:
test_result_lit = send_requests_live(
    data=test_game
)

In [59]:
test_result_lit

[{'game_id': '642ea283c084ff1c6afcc825',
  'game_name': 'Honkai Impact 3-VN',
  'current_game_class': 'Action & Strategy',
  'current_game_genre': 'Strategy',
  'current_game_subgenre': 'MOBA',
  'is_current_genre_valid': False,
  'corrected_game_class': 'Action & Strategy',
  'corrected_game_genre': 'RPG',
  'corrected_game_subgenre': 'Action RPG',
  'rationale': "Honkai Impact 3-VN is primarily an action role-playing game with an emphasis on real-time combat mechanics, character customization, and story-driven content. The short descriptions and detailed narratives focus on the characters, storyline, and combat system, typical attributes of an Action RPG rather than a MOBA. Thus, the game's classification as a 'Strategy' genre under 'MOBA' is inaccurate.",
  'requiring_additional_research': False}]