In [1]:
 #Libraries:

from IPython.display import display
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Getting the training data:
train_data = []
with open('train.jsonl', 'r') as file:
    for line in file:
        train_data.append(json.loads(line))

print(f"Successfully loaded {len(train_data)} battles")

# Get the test data:
test_data = []
with open('test.jsonl', 'r') as f:
    for line in f:
        test_data.append(json.loads(line))
print(f"Successfully loaded {len(test_data)} battles")

# Looking into the data:
first_battle = train_data[0]    #first entire row

# Copy the first battle and truncate the timeline for better display of data
battle_for_display = first_battle.copy()
battle_for_display['battle_timeline'] = battle_for_display.get('battle_timeline', [])[:6] #first 6 turns

# json.dumps for cleaner printing
print(json.dumps(battle_for_display, indent=4))

Successfully loaded 10000 battles
Successfully loaded 5000 battles
{
    "player_won": true,
    "p1_team_details": [
        {
            "name": "starmie",
            "level": 100,
            "types": [
                "psychic",
                "water"
            ],
            "base_hp": 60,
            "base_atk": 75,
            "base_def": 85,
            "base_spa": 100,
            "base_spd": 100,
            "base_spe": 115
        },
        {
            "name": "exeggutor",
            "level": 100,
            "types": [
                "grass",
                "psychic"
            ],
            "base_hp": 95,
            "base_atk": 95,
            "base_def": 85,
            "base_spa": 125,
            "base_spd": 125,
            "base_spe": 55
        },
        {
            "name": "chansey",
            "level": 100,
            "types": [
                "normal",
                "notype"
            ],
            "base_hp": 250,
            "base_atk": 5

In [None]:
# Classifier for types function

def type_classifier(data:list[dict]) -> pd.DataFrame:
    """
    Classify the type of pokemon from their basic stats 
    """

    pokemon_features = []
    for battle in data:
        p1_team = battle.get('p1_team_details', [])
        
        # Create one row per Pokemon in the team
        for pokemon in p1_team:
            types = pokemon.get('types', [])
            type_str = f"{types[0]}-{types[1]}"
            
            characteristics = {
                'name': pokemon.get('name', ''),
                'base_hp': pokemon.get('base_hp', 0),
                'base_attack': pokemon.get('base_atk', 0),
                'base_defense': pokemon.get('base_def', 0),
                'base_special_attack': pokemon.get('base_spa', 0),
                'base_special_defense': pokemon.get('base_spd', 0),
                'base_speed': pokemon.get('base_spe', 0),
                'pokemon_type': type_str
            }
            pokemon_features.append(characteristics)

    return pd.DataFrame(pokemon_features).fillna(0)

In [4]:
train_df = type_classifier(train_data)
display(train_df.head(11))

Unnamed: 0,name,base_hp,base_attack,base_defense,base_special_attack,base_special_defense,base_speed,pokemon_type
0,starmie,60,75,85,100,100,115,psychic-water
1,exeggutor,95,95,85,125,125,55,grass-psychic
2,chansey,250,5,5,105,105,50,normal-notype
3,snorlax,160,110,65,65,65,30,normal-notype
4,tauros,75,100,95,70,70,110,normal-notype
5,alakazam,55,50,45,135,135,120,notype-psychic
6,jynx,65,50,35,95,95,95,ice-psychic
7,snorlax,160,110,65,65,65,30,normal-notype
8,exeggutor,95,95,85,125,125,55,grass-psychic
9,tauros,75,100,95,70,70,110,normal-notype


In [6]:
X = train_df[['base_hp', 'base_attack', 'base_defense', 'base_special_attack', 'base_special_defense', 'base_speed']]
y = train_df['pokemon_type'] == 'normal-notype'

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)
tree = DecisionTreeClassifier(max_depth = 1).fit(X, y)
accuracy_score(y_train, tree.predict(x_train))

0.8465238095238096

In [7]:
accuracy_score(y_test, tree.predict(x_test))

0.8417777777777777