# Natalia Palej A00279259
Software Design with Artificial Intelligence for Cloud Computing, Year 4

# Part 5: Visualizations with Built-in Statistical Features 

In [13]:
import pandas as pd 
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [6]:
df = pd.read_pickle('data/final_cleaned_pokemon_data.pkl')

In [7]:
df.head()

Unnamed: 0,name,national_no,species,ev_yield,catch_rate,base_friendship,base_exp,growth_rate,egg_groups,egg_cycles,...,pokemon_img_url,height_m,weight_kg,primary_type,secondary_type,male_percentage,female_percentage,gen,primary_ability,secondary_ability
0,bulbasaur,1,Seed Pokémon,1 Sp. Atk,45.0,50.0,64,Medium Slow,"Grass, Monster",20,...,https://img.pokemondb.net/artwork/bulbasaur.jpg,0.7,6.9,grass,poison,87.5,12.5,1,Overgrow,Chlorophyll
1,ivysaur,2,Seed Pokémon,"1 Sp. Atk, 1 Sp. Def",45.0,50.0,142,Medium Slow,"Grass, Monster",20,...,https://img.pokemondb.net/artwork/ivysaur.jpg,1.0,13.0,grass,poison,87.5,12.5,1,Overgrow,Chlorophyll
2,venusaur,3,Seed Pokémon,"2 Sp. Atk, 1 Sp. Def",45.0,50.0,236,Medium Slow,"Grass, Monster",20,...,https://img.pokemondb.net/artwork/venusaur.jpg,2.0,100.0,grass,poison,87.5,12.5,1,Overgrow,Chlorophyll
3,charmander,4,Lizard Pokémon,1 Speed,45.0,50.0,62,Medium Slow,"Dragon, Monster",20,...,https://img.pokemondb.net/artwork/charmander.jpg,0.6,8.5,fire,Pure,87.5,12.5,1,Blaze,Solar Power
4,charmeleon,5,Flame Pokémon,"1 Sp. Atk, 1 Speed",45.0,50.0,142,Medium Slow,"Dragon, Monster",20,...,https://img.pokemondb.net/artwork/charmeleon.jpg,1.1,19.0,fire,Pure,87.5,12.5,1,Blaze,Solar Power


In [8]:
df.columns

Index(['name', 'national_no', 'species', 'ev_yield', 'catch_rate',
       'base_friendship', 'base_exp', 'growth_rate', 'egg_groups',
       'egg_cycles', 'hp', 'attack', 'defense', 'sp_atk', 'sp_def', 'speed',
       'total', 'evo_path', 'pokemon_img_url', 'height_m', 'weight_kg',
       'primary_type', 'secondary_type', 'male_percentage',
       'female_percentage', 'gen', 'primary_ability', 'secondary_ability'],
      dtype='object')

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   name               76 non-null     object  
 1   national_no        76 non-null     Int64   
 2   species            76 non-null     object  
 3   ev_yield           76 non-null     object  
 4   catch_rate         76 non-null     float64 
 5   base_friendship    76 non-null     float64 
 6   base_exp           76 non-null     int32   
 7   growth_rate        76 non-null     object  
 8   egg_groups         76 non-null     object  
 9   egg_cycles         76 non-null     int64   
 10  hp                 76 non-null     Int64   
 11  attack             76 non-null     Int64   
 12  defense            76 non-null     Int64   
 13  sp_atk             76 non-null     Int64   
 14  sp_def             76 non-null     Int64   
 15  speed              76 non-null     Int64   
 16  total     

## Data Grouping

### Compare basic 6 stats across various generations

In [15]:
# List of stats
stats = ['hp', 'min_hp', 'max_hp', 'attack', 'min_attack', 'max_attack', 
         'defense', 'min_defense', 'max_defense', 'sp_atk', 'min_sp_atk', 'max_sp_atk', 
         'sp_def', 'min_sp_def', 'max_sp_def', 'speed', 'min_speed', 'max_speed']

In [16]:
# Define colors for each generation
gen_colors = {
    1: '#f30d0d', 2: '#22f30d', 3: '#0db4f3', 4: '#000000',
    5: '#9b34df', 6: '#ff61fb', 7: '#f5fd33', 8: '#fdb933'
}

## Create Subplot (Facet)

In [17]:
# Define 2x3 facet containing in total 6 subplots
subplot = make_subplots(rows=2, cols=3, subplot_titles=stats)

## Main Loop for Vioolin Subplots

In [19]:
for i, stat in enumerate(stats):
    # define rows and columns
    row = i // 3 + 1
    col = i % 3 + 1

    # Create violin subplot for each gen and stat
    for gen, sub_df in df.groupby('gen'):
        subplot.add_trace(
            go.Violin(
                x=sub_df[stat],
                # Vertical separation between generations
                y0=i + 1,  
                name=f'Gen {gen}',
                fillcolor=gen_colors[gen],
                line=dict(color=gen_colors[gen]),
                opacity=0.6,
                box_visible=True,
                meanline_visible=True
            ),
            row=row,
            col=col
        )

subplot.update_layout(
    title="Pokemon Min and Max Stats Level Plot",
    height=800,
    width=1000,
    showlegend=False
)

subplot.update_yaxes(showticklabels=False)

subplot.show()

KeyError: 'min_hp'

### Correct and Enhance the Visualisation

In [None]:
for i, stat in enumerate(stats):
    # define rows and columns
    row = i // 3 + 1
    col = i % 3 + 1

    # Create violin subplot for each gen and stat
    for gen, sub_df in df.groupby('gen'):
        subplot.add_trace(
            go.Violin(
                x=sub_df[stat],
                # Position each gen along y-axis
                y0=gen,  
                name=f'Gen {gen}',
                fillcolor=gen_colors[gen],
                line=dict(color=gen_colors[gen]),
                opacity=0.6,
                box_visible=True,
                meanline_visible=True,
                width=3,
                showlegend=False
            ),
            row=row,
            col=col
        )

subplot.update_layout(
    title="Pokemon Min and Max Stats Level Plot",
    height=800,
    width=1000,
    showlegend=False
)

# Set x-axis and y-axis properties for all subplots
for axis in subplot.layout:
    if axis.startswith('xaxis'):
        subplot.layout[axis].update(range=[0, 600], dtick=100)
    elif axis.startswith('yaxis'):
        subplot.layout[axis].update(dtick=1, range=[1, 8], showticklabels=True)


# Define a title for the entire figure
for axis_title in range(1, 7):
    subplot.layout[f"xaxis{axis_title}_title"] = "Stat Value"
    subplot.layout[f"yaxis{axis_title}_title"] = "Generation"


subplot.show()