In [1]:
!pip install matplotlib seaborn plotly
!pip install --upgrade nbformat ipython



In [2]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import os

plt.style.use("default")
sns.set()

In [3]:
df = pd.read_csv("../data/pokemon_stats_2025.csv")
df.head()


Unnamed: 0,pokedex_id,name,height,weight,base_experience,type_1,type_2,hp,attack,defense,special_attack,special_defense,speed
0,1,Bulbasaur,7,69,64,grass,poison,45,49,49,65,65,45
1,2,Ivysaur,10,130,142,grass,poison,60,62,63,80,80,60
2,3,Venusaur,20,1000,236,grass,poison,80,82,83,100,100,80
3,4,Charmander,6,85,62,fire,,39,52,43,60,50,65
4,5,Charmeleon,11,190,142,fire,,58,64,58,80,65,80


In [6]:
df.shape

(250, 13)

In [11]:
df.columns 

Index(['pokedex_id', 'name', 'height', 'weight', 'base_experience', 'type_1',
       'type_2', 'hp', 'attack', 'defense', 'special_attack',
       'special_defense', 'speed'],
      dtype='str')

In [14]:
df.info()

df.describe

<class 'pandas.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   pokedex_id       250 non-null    int64
 1   name             250 non-null    str  
 2   height           250 non-null    int64
 3   weight           250 non-null    int64
 4   base_experience  250 non-null    int64
 5   type_1           250 non-null    str  
 6   type_2           115 non-null    str  
 7   hp               250 non-null    int64
 8   attack           250 non-null    int64
 9   defense          250 non-null    int64
 10  special_attack   250 non-null    int64
 11  special_defense  250 non-null    int64
 12  speed            250 non-null    int64
dtypes: int64(10), str(3)
memory usage: 25.5 KB


<bound method NDFrame.describe of      pokedex_id        name  height  weight  base_experience   type_1  type_2  \
0             1   Bulbasaur       7      69               64    grass  poison   
1             2     Ivysaur      10     130              142    grass  poison   
2             3    Venusaur      20    1000              236    grass  poison   
3             4  Charmander       6      85               62     fire     NaN   
4             5  Charmeleon      11     190              142     fire     NaN   
..          ...         ...     ...     ...              ...      ...     ...   
245         246    Larvitar       6     720               60     rock  ground   
246         247     Pupitar      12    1520              144     rock  ground   
247         248   Tyranitar      20    2020              270     rock    dark   
248         249       Lugia      52    2160              306  psychic  flying   
249         250       Ho-oh      38    1990              306     fire  flyi

In [4]:
df.isnull().sum()

pokedex_id           0
name                 0
height               0
weight               0
base_experience      0
type_1               0
type_2             135
hp                   0
attack               0
defense              0
special_attack       0
special_defense      0
speed                0
dtype: int64

In [5]:
# plt.figure(figsize=(10,4))
# sns.heatmap(df.isnull(), cbar=False)
# plt.title("Missing Values in Pok√©mon Dataset")
# plt.show()
# Missing Values Analysis (Interactive)
missing_df = df.isnull().sum().reset_index()
missing_df.columns = ["Column", "Missing Values"]

fig = px.bar(
    missing_df,
    x="Column",
    y="Missing Values",
    text="Missing Values",
    title="Missing Values per Column"
)

fig.update_traces(textposition="outside")
fig.show()

In [None]:
# Univariate Analysis (Single Column)

# üîπ Distribution of HP

# plt.figure(figsize=(6,4))
# sns.histplot(df['defense'],bins=30, kde = True)
# plt.title("Distribution of Pok√©mon Defense")
# plt.xlabel("Defense")
# plt.ylabel("Count")
# plt.show()
fig = px.histogram(
    df,
    x="defense",
    nbins=30,
    title="Distribution of Pok√©mon Defense"
)
fig.show()

In [12]:
# üîπ Boxplot for hp (Outliers)
# plt.figure(figsize=(6,3))
# sns.boxplot(x=df['hp'])
# plt.title("HP Stat Outliers")
# plt.show()
fig = px.box(
    df,
    x="hp",
    title="Attack Stat Outliers"
)
fig.show()


In [14]:
# Categorical Analysis (Types)

# üîπ Primary Pok√©mon Type Distribution

# plt.figure(figsize=(10,5))
# sns.countplot(x='type_1', data=df, order=df['type_1'].value_counts().index)
# plt.xticks(rotation=45)
# plt.title("Distribution of Primary Pok√©mon Types")
# plt.show()
type_counts = df["type_1"].value_counts().reset_index()
type_counts.columns = ["Type", "Count"]

fig = px.bar(
    type_counts,
    x="Type",
    y="Count",
    title="Distribution of Primary Pok√©mon Types"
)
fig.show()

In [16]:
# Bivariate Analysis
# üîπ Attack vs Defense
# plt.figure(figsize=(6,4))
# sns.scatterplot(x='attack', y='defense', data=df)
# plt.title("Attack vs Defense")
# plt.show()
fig = px.scatter(
    df,
    x="attack",
    y="defense",
    hover_name="name",
    title="Attack vs Defense"
)
fig.show()

In [18]:
# üîπ Speed vs Base Experience
# sns.scatterplot(x='speed', y='base_experience', data=df)
# plt.title("Speed vs Base Experience")
# plt.show()
fig = px.scatter(
    df,
    x="speed",
    y="base_experience",
    hover_name="name",
    title="Speed vs Base Experience"
)
fig.show()

In [19]:
# Correlation Analysis
# numeric_df = df.select_dtypes(include=['int64', 'float64'])
# plt.figure(figsize=(10,8))
# sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm')
# plt.title("Correlation Heatmap of Pok√©mon Stats")
# plt.show()

numeric_df = df.select_dtypes(include=["int64", "float64"]).drop(columns=["pokedex_id"])
corr = numeric_df.corr()

fig = px.imshow(
    corr,
    text_auto=True,
    title="Correlation Heatmap of Pok√©mon Numeric Stats"
)
fig.show()


In [20]:
# Multivariate Analysis (Interactive)
fig = px.scatter_matrix(
    df,
    dimensions=["hp", "attack", "defense", "speed"],
    hover_name="name",
    title="Multivariate Analysis of Pok√©mon Stats"
)
fig.show()