# Pokémon Dataset Cleaning and Exploration
This notebook demonstrates common data cleaning, standardization, and exploratory steps for the Pokémon Generation 1 dataset.

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("D:\\Code\\Python\\Class\\My Class\\DataSet\\Pokemon\\pokemonGen1.csv", index_col="Name")
df.head()

Unnamed: 0_level_0,No,Type1,Type2,Height,Weight,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bulbasaur,1,grass,Poison,0.7,6.9,0
Ivysaur,2,grass,Poison,1.0,13.0,0
Venusaur,3,Grass,Poison,2.0,100.0,0
Charmander,4,Fire,,0.6,8.5,0
Charmeleon,5,Fire,,1.1,19.0,0


## 1. Drop Irrelevant Columns

In [33]:
df_clean = df.drop(columns=["Legendary", "No"])
df_clean.head(5)

Unnamed: 0_level_0,Type1,Type2,Height,Weight
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bulbasaur,grass,Poison,0.7,6.9
Ivysaur,grass,Poison,1.0,13.0
Venusaur,Grass,Poison,2.0,100.0
Charmander,Fire,,0.6,8.5
Charmeleon,Fire,,1.1,19.0


## 2. Handle Missing Data

In [34]:
# Drop rows where 'Type2' is missing
df_dropna = df.dropna(subset=["Type2"])
df_dropna.head()

Unnamed: 0_level_0,No,Type1,Type2,Height,Weight,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bulbasaur,1,grass,Poison,0.7,6.9,0
Ivysaur,2,grass,Poison,1.0,13.0,0
Venusaur,3,Grass,Poison,2.0,100.0,0
Charizard,6,Fire,Flying,1.7,90.5,0
Butterfree,12,Bug,Flying,1.1,32.0,0


In [35]:
# Fill missing 'Type2' with 'None'
df_filled = df.fillna({"Type2": "None"})
df_filled.head()

Unnamed: 0_level_0,No,Type1,Type2,Height,Weight,Legendary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bulbasaur,1,grass,Poison,0.7,6.9,0
Ivysaur,2,grass,Poison,1.0,13.0,0
Venusaur,3,Grass,Poison,2.0,100.0,0
Charmander,4,Fire,,0.6,8.5,0
Charmeleon,5,Fire,,1.1,19.0,0


## 3. Handle Inconsistent Data

In [36]:
print(df['Type1'].unique())

['grass' 'Grass' 'Fire' 'Water' 'Bug' 'Normal' 'Poison' 'Electric'
 'Ground' 'Fairy' 'Fighting' 'Psychic' 'Rock' 'Ghost' 'Ice' 'Dragon']


In [37]:

# Replace inconsistent values in 'Type1'
df['Type1'] = df['Type1'].replace({"grass": "Grass"})
print(df['Type1'].unique())

['Grass' 'Fire' 'Water' 'Bug' 'Normal' 'Poison' 'Electric' 'Ground'
 'Fairy' 'Fighting' 'Psychic' 'Rock' 'Ghost' 'Ice' 'Dragon']


## 4. Standardize Text

In [38]:
# Convert 'Type1' and 'Type2' to lowercase
df[['Type1', 'Type2']] = df[['Type1', 'Type2']].apply(lambda x: x.str.lower())
df[['Type1', 'Type2']].head()

Unnamed: 0_level_0,Type1,Type2
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bulbasaur,grass,poison
Ivysaur,grass,poison
Venusaur,grass,poison
Charmander,fire,
Charmeleon,fire,


## 5. Fix Data Types

In [39]:
df['Legendary'].dtype


dtype('int64')

In [40]:
df['Legendary'] = df['Legendary'].astype(bool)
df['Legendary'].dtype

dtype('bool')

## 6. Remove Duplicates

In [42]:
df_no_duplicates = df.drop_duplicates()
df_no_duplicates.shape

(150, 6)

## 7. Check for Missing Values

In [44]:
df.isnull().sum()

No            0
Type1         0
Type2        83
Height        0
Weight        0
Legendary     0
dtype: int64

### Save Cleaned Dataset

In [45]:
df.to_csv("D:\\Code\\Python\\Class\\My Class\\DataSet\\Pokemon\\pokemonGen1_cleaned.csv")