# Importing Files - working with file (dataPokemon.csv)

In [1]:
import pandas as pd

In [54]:
df = pd.read_csv("dataPokemon.csv")
print(df)


      No        Name   Type1   Type2  Height  Weight  Legendary
0      1   Bulbasaur   Grass  Poison     0.7     6.9          0
1      2     Ivysaur   Grass  Poison     1.0    13.0          0
2      3    Venusaur   Grass  Poison     2.0   100.0          0
3      4  Charmander    Fire     NaN     0.6     8.5          0
4      5  Charmeleon    Fire     NaN     1.1    19.0          0
..   ...         ...     ...     ...     ...     ...        ...
150    1   Bulbasaur   Grass  Poison     0.7     6.9          0
151    1   Bulbasaur   Grass  Poison     0.7     6.9          0
152    1   Bulbasaur   Grass  Poison     0.7     6.9          0
153  149   Dragonite  Dragon  Flying     2.2   210.0          0
154  149   Dragonite  Dragon  Flying     2.2   210.0          0

[155 rows x 7 columns]


### to_string

### print(df) → may truncate large DataFrames (shows only head/tail).
### df.to_string() → prints the entire DataFrame with no truncation.

In [None]:
print(df.to_string())

# Selection

## COLUMN - Single 

In [6]:
print(df["Name"])

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
145       Moltres
146       Dratini
147     Dragonair
148     Dragonite
149        Mewtwo
Name: Name, Length: 150, dtype: object


## COLUMN - Multiple

In [7]:
print(df[["Name", "Height", "Weight"]])

           Name  Height  Weight
0     Bulbasaur     0.7     6.9
1       Ivysaur     1.0    13.0
2      Venusaur     2.0   100.0
3    Charmander     0.6     8.5
4    Charmeleon     1.1    19.0
..          ...     ...     ...
145     Moltres     2.0    60.0
146     Dratini     1.8     3.3
147   Dragonair     4.0    16.5
148   Dragonite     2.2   210.0
149      Mewtwo     2.0   122.0

[150 rows x 3 columns]


## Row - Single/Multiple

In [9]:
print(df.loc[0])

No                   1
Name         Bulbasaur
Type1            Grass
Type2           Poison
Height             0.7
Weight             6.9
Legendary            0
Name: 0, dtype: object


In [10]:
print(df.loc[[0,3, 148]])

      No        Name   Type1   Type2  Height  Weight  Legendary
0      1   Bulbasaur   Grass  Poison     0.7     6.9          0
3      4  Charmander    Fire     NaN     0.6     8.5          0
148  149   Dragonite  Dragon  Flying     2.2   210.0          0


### We can set the index by any property like name :  by using index_col and now you can locate by that property
### df = pd.read_csv("dataPokemon.csv", index_col= "Name")
### df.loc["Pikachu"]


## Only Show particular properties in a row

In [11]:
print(df.loc[1, ["Name", "Type1", "Legendary"]])

Name         Ivysaur
Type1          Grass
Legendary          0
Name: 1, dtype: object


## Range

use iloc: [rowStart: rowEnd: step, columnStart: columnStart]

In [14]:
print(df.loc[43:47])

    No       Name  Type1   Type2  Height  Weight  Legendary
43  44      Gloom  Grass  Poison     0.8     8.6          0
44  45  Vileplume  Grass  Poison     1.2    18.6          0
45  46      Paras    Bug   Grass     0.3     5.4          0
46  47   Parasect    Bug   Grass     1.0    29.5          0
47  48    Venonat    Bug  Poison     1.0    30.0          0


In [16]:
print(df.iloc[3:10:2, 3:5])

    Type2  Height
3     NaN     0.6
5  Flying     1.7
7     NaN     1.0
9     NaN     0.3


## Exercise

### user type the name Pokemon and find it

# Filtering In DataFrame

In [20]:
print(df[df["Name"] == "Ivysaur"])

   No     Name  Type1   Type2  Height  Weight  Legendary
1   2  Ivysaur  Grass  Poison     1.0    13.0          0


In [23]:
search = input("Enter the pokemon name: ")

try:
    print(df[df["Name"]== search])
except KeyError:
    print("Not Found")


    No       Name  Type1   Type2  Height  Weight  Legendary
44  45  Vileplume  Grass  Poison     1.2    18.6          0


In [31]:
tall_pokemon= df[df["Height"]>=3]
print(tall_pokemon)

      No       Name   Type1   Type2  Height  Weight  Legendary
23    24      Arbok  Poison     NaN     3.5    65.0          0
94    95       Onix    Rock  Ground     8.8   210.0          0
129  130   Gyarados   Water  Flying     6.5   235.0          0
147  148  Dragonair  Dragon     NaN     4.0    16.5          0


# Aggregation 
### to summarize and analyze dataMean , sum, min , max, count

In [32]:
print(df.mean(numeric_only=True))

No           75.500000
Height        1.200000
Weight       46.231333
Legendary     0.026667
dtype: float64


In [36]:
print(df.sum(numeric_only=True))

No           11325.0
Height         180.0
Weight        6934.7
Legendary        4.0
dtype: float64


In [37]:
print(df.min(numeric_only=True))

No           1.0
Height       0.2
Weight       0.1
Legendary    0.0
dtype: float64


In [38]:
print(df.max(numeric_only=True))

No           150.0
Height         8.8
Weight       460.0
Legendary      1.0
dtype: float64


In [39]:
print(df.count())

No           150
Name         150
Type1        150
Type2         67
Height       150
Weight       150
Legendary    150
dtype: int64


## Aggregate for single column

In [34]:
print(df["Height"].mean())

1.2


## GroupBy

In [40]:
group = df.groupby("Type1")
print(group)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x10ec13f10>


In [41]:
print(group["Height"].mean())

Type1
Bug         0.900000
Dragon      2.666667
Electric    0.855556
Fairy       0.950000
Fighting    1.185714
Fire        1.216667
Ghost       1.466667
Grass       1.083333
Ground      0.850000
Ice         1.550000
Normal      0.986364
Poison      1.221429
Psychic     1.371429
Rock        1.844444
Water       1.300000
Name: Height, dtype: float64


# Data Cleaning

## 1. Drop Irrelevant column

In [45]:
df = df.drop(columns=["Legendary", "No"])
print(df)

           Name    Type1   Type2  Height  Weight
0     Bulbasaur    Grass  Poison     0.7     6.9
1       Ivysaur    Grass  Poison     1.0    13.0
2      Venusaur    Grass  Poison     2.0   100.0
3    Charmander     Fire     NaN     0.6     8.5
4    Charmeleon     Fire     NaN     1.1    19.0
..          ...      ...     ...     ...     ...
145     Moltres     Fire  Flying     2.0    60.0
146     Dratini   Dragon     NaN     1.8     3.3
147   Dragonair   Dragon     NaN     4.0    16.5
148   Dragonite   Dragon  Flying     2.2   210.0
149      Mewtwo  Psychic     NaN     2.0   122.0

[150 rows x 5 columns]


## 2. Handle Missing Data

dropna() → removes rows that have missing values (NaN).
subset → lets you specify which column(s) to check for missing values.
👉 Only rows missing in that column (or columns) will be dropped.

In [None]:
df = df.dropna(subset=["Type2"])

In [47]:
print(df.to_string())

           Name     Type1     Type2  Height  Weight
0     Bulbasaur     Grass    Poison     0.7     6.9
1       Ivysaur     Grass    Poison     1.0    13.0
2      Venusaur     Grass    Poison     2.0   100.0
5     Charizard      Fire    Flying     1.7    90.5
11   Butterfree       Bug    Flying     1.1    32.0
12       Weedle       Bug    Poison     0.3     3.2
13       Kakuna       Bug    Poison     0.6    10.0
14     Beedrill       Bug    Poison     1.0    29.5
15       Pidgey    Normal    Flying     0.3     1.8
16    Pidgeotto    Normal    Flying     1.1    30.0
17      Pidgeot    Normal    Flying     1.5    39.5
20      Spearow    Normal    Flying     0.3     2.0
21       Fearow    Normal    Flying     1.2    38.0
30    Nidoqueen    Poison    Ground     1.3    60.0
33     Nidoking    Poison    Ground     1.4    62.0
38   Jigglypuff    Normal     Fairy     0.5     5.5
39   Wigglytuff    Normal     Fairy     1.0    12.0
40        Zubat    Poison    Flying     0.8     7.5
41       Gol

## 3. Replace the missing values by filling
- Replace any not available values in the column of Type2 with the folllwing values

In [49]:
df = df.fillna({"Type2": "None"})
print(df)

      No        Name    Type1   Type2  Height  Weight  Legendary
0      1   Bulbasaur    Grass  Poison     0.7     6.9          0
1      2     Ivysaur    Grass  Poison     1.0    13.0          0
2      3    Venusaur    Grass  Poison     2.0   100.0          0
3      4  Charmander     Fire    None     0.6     8.5          0
4      5  Charmeleon     Fire    None     1.1    19.0          0
..   ...         ...      ...     ...     ...     ...        ...
145  146     Moltres     Fire  Flying     2.0    60.0          1
146  147     Dratini   Dragon    None     1.8     3.3          0
147  148   Dragonair   Dragon    None     4.0    16.5          0
148  149   Dragonite   Dragon  Flying     2.2   210.0          0
149  150      Mewtwo  Psychic    None     2.0   122.0          1

[150 rows x 7 columns]


## 4. Fix Inconsistent Values

In [51]:
df["Type1"]= df["Type1"].replace({"Grass": "GRASS"})
print(df)

      No        Name    Type1   Type2  Height  Weight  Legendary     Type
0      1   Bulbasaur    GRASS  Poison     0.7     6.9          0    GRASS
1      2     Ivysaur    GRASS  Poison     1.0    13.0          0    GRASS
2      3    Venusaur    GRASS  Poison     2.0   100.0          0    GRASS
3      4  Charmander     Fire    None     0.6     8.5          0     Fire
4      5  Charmeleon     Fire    None     1.1    19.0          0     Fire
..   ...         ...      ...     ...     ...     ...        ...      ...
145  146     Moltres     Fire  Flying     2.0    60.0          1     Fire
146  147     Dratini   Dragon    None     1.8     3.3          0   Dragon
147  148   Dragonair   Dragon    None     4.0    16.5          0   Dragon
148  149   Dragonite   Dragon  Flying     2.2   210.0          0   Dragon
149  150      Mewtwo  Psychic    None     2.0   122.0          1  Psychic

[150 rows x 8 columns]


## 5. Standardize Text

In [52]:
df["Type2"]= df["Type2"].str.upper()
print(df)

      No        Name    Type1   Type2  Height  Weight  Legendary     Type
0      1   Bulbasaur    GRASS  POISON     0.7     6.9          0    GRASS
1      2     Ivysaur    GRASS  POISON     1.0    13.0          0    GRASS
2      3    Venusaur    GRASS  POISON     2.0   100.0          0    GRASS
3      4  Charmander     Fire    NONE     0.6     8.5          0     Fire
4      5  Charmeleon     Fire    NONE     1.1    19.0          0     Fire
..   ...         ...      ...     ...     ...     ...        ...      ...
145  146     Moltres     Fire  FLYING     2.0    60.0          1     Fire
146  147     Dratini   Dragon    NONE     1.8     3.3          0   Dragon
147  148   Dragonair   Dragon    NONE     4.0    16.5          0   Dragon
148  149   Dragonite   Dragon  FLYING     2.2   210.0          0   Dragon
149  150      Mewtwo  Psychic    NONE     2.0   122.0          1  Psychic

[150 rows x 8 columns]


## 6. Fix/Change data types

In [53]:
df["Legendary"]=df["Legendary"].astype(bool)
print(df)

      No        Name    Type1   Type2  Height  Weight  Legendary     Type
0      1   Bulbasaur    GRASS  POISON     0.7     6.9      False    GRASS
1      2     Ivysaur    GRASS  POISON     1.0    13.0      False    GRASS
2      3    Venusaur    GRASS  POISON     2.0   100.0      False    GRASS
3      4  Charmander     Fire    NONE     0.6     8.5      False     Fire
4      5  Charmeleon     Fire    NONE     1.1    19.0      False     Fire
..   ...         ...      ...     ...     ...     ...        ...      ...
145  146     Moltres     Fire  FLYING     2.0    60.0       True     Fire
146  147     Dratini   Dragon    NONE     1.8     3.3      False   Dragon
147  148   Dragonair   Dragon    NONE     4.0    16.5      False   Dragon
148  149   Dragonite   Dragon  FLYING     2.2   210.0      False   Dragon
149  150      Mewtwo  Psychic    NONE     2.0   122.0       True  Psychic

[150 rows x 8 columns]


## 7. Remove Duplicate Values

In [55]:
df = df.drop_duplicates()
print(df)

      No        Name    Type1   Type2  Height  Weight  Legendary
0      1   Bulbasaur    Grass  Poison     0.7     6.9          0
1      2     Ivysaur    Grass  Poison     1.0    13.0          0
2      3    Venusaur    Grass  Poison     2.0   100.0          0
3      4  Charmander     Fire     NaN     0.6     8.5          0
4      5  Charmeleon     Fire     NaN     1.1    19.0          0
..   ...         ...      ...     ...     ...     ...        ...
145  146     Moltres     Fire  Flying     2.0    60.0          1
146  147     Dratini   Dragon     NaN     1.8     3.3          0
147  148   Dragonair   Dragon     NaN     4.0    16.5          0
148  149   Dragonite   Dragon  Flying     2.2   210.0          0
149  150      Mewtwo  Psychic     NaN     2.0   122.0          1

[150 rows x 7 columns]
