In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

**Pandas Datastructures**

Pandas supports two datastructures
1. Pandas Series
2. Pandas DataFrame

In [10]:
list_1 = ['a', 'b', 'c', 'd']
labels = [1, 2, 3, 4]

ser_1 = pd.Series(data=list_1, index=labels)
# print(ser_1)

arr_1 = [1, 2, 3, 4]
ser_2 = pd.Series(arr_1)
print(ser_2[0])

dict_1 = {"name": "JIHOO",
         "age": 56}
ser_3 = pd.Series(dict_1)
print(ser_3["name"])

1
JIHOO


## Pandas Dataframe

Pandas Dataframe is a two dimensional labeled data structure. It consists of rows and columns. \
Each column in Pandas DataFrame is a Pandas Series.

**Creating Dataframes**

In [26]:
arr_2 = np.random.randint(10, 50, size=(2,3))

df_1 = pd.DataFrame(arr_2, ['A', 'B'], ['C', 'D', 'E'])

dict_3 = {'One': pd.Series([1.0, 2.0, 3.0], index=['a', 'b', 'c']),
         'Two': pd.Series([1.0, 2.0, 3.0, 4.0], index=['a', 'b', 'c', 'd'])}
df_2 = pd.DataFrame(dict_3)
df_2

Unnamed: 0,One,Two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


**Editing & Retrieving Data**

In [27]:
df_1

Unnamed: 0,C,D,E
A,37,42,14
B,47,37,28


In [29]:
df_1[['C', 'E']]

Unnamed: 0,C,E
A,37,14
B,47,28


In [30]:
df_1.loc['A']

C    37
D    42
E    14
Name: A, dtype: int32

In [32]:
df_1.iloc[0]

C    37
D    42
E    14
Name: A, dtype: int32

In [33]:
df_1.loc['A', 'C']

37

In [34]:
df_1['Total'] = df_1['C'] + df_1['D'] + df_1['E']
df_1

Unnamed: 0,C,D,E,Total
A,37,42,14,93
B,47,37,28,112


In [35]:
df_1['Multi'] = df_1['C'] * df_1['E']
df_1

Unnamed: 0,C,D,E,Total,Multi
A,37,42,14,93,518
B,47,37,28,112,1316


**Filter data by using conditional selection**

In [42]:
df_1

Unnamed: 0,C,D,E
A,35,20,45
B,49,41,16


In [44]:
arr_2 = np.array([[35,20,45],[49,41,16]])
df_1 = pd.DataFrame(arr_2, ['A', 'B'], ['C', 'D', 'E'])

# print('Greater than 40\n', df_1 > 40)

bool_1 = df_1 > 40
df_1[bool_1]

df_2 = df_1[df_1['E'] > 40]
df_2

Unnamed: 0,C,D,E
A,35,20,45


## Load data using Pandas

In [74]:
df = pd.read_csv('csvs\\pokemon_data.csv')
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False


In [65]:
df.columns

df['Name']
### multiple columns
df[['Name', 'HP']]

### Read each row
df.iloc[1:4]

### Read a specific position
df.iloc[4, 1]

### conditional selection
df.loc[df['Type 1'] == 'Fire']

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False
5,5,Charmeleon,Fire,,58,64,58,80,65,80,1,False
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,False
7,6,CharizardMega Charizard X,Fire,Dragon,78,130,111,130,85,100,1,False
8,6,CharizardMega Charizard Y,Fire,Flying,78,104,78,159,115,100,1,False
42,37,Vulpix,Fire,,38,41,40,50,65,65,1,False
43,38,Ninetales,Fire,,73,76,75,81,100,100,1,False
63,58,Growlithe,Fire,,55,70,45,70,50,60,1,False
64,59,Arcanine,Fire,,90,110,80,100,80,95,1,False
83,77,Ponyta,Fire,,50,85,55,65,65,90,1,False


In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   HP          800 non-null    int64 
 5   Attack      800 non-null    int64 
 6   Defense     800 non-null    int64 
 7   Sp. Atk     800 non-null    int64 
 8   Sp. Def     800 non-null    int64 
 9   Speed       800 non-null    int64 
 10  Generation  800 non-null    int64 
 11  Legendary   800 non-null    bool  
dtypes: bool(1), int64(8), object(3)
memory usage: 69.7+ KB


In [67]:
df.describe()

Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


In [68]:
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False


In [75]:
df.sort_values(['HP','Type 1'], ascending=[1,0], inplace=True)

## Making changes to the data

In [77]:
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
df

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
316,292,Shedinja,Bug,Ghost,1,90,45,30,30,40,3,False,236
55,50,Diglett,Ground,,10,55,25,35,45,95,1,False,265
139,129,Magikarp,Water,,20,10,55,15,20,80,1,False,200
381,349,Feebas,Water,,20,15,20,10,55,80,3,False,200
487,439,Mime Jr.,Psychic,Fairy,20,25,45,70,90,60,4,False,310
...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,594,Alomomola,Water,,165,75,80,40,45,65,5,False,470
351,321,Wailord,Water,,170,90,45,90,45,60,3,False,500
217,202,Wobbuffet,Psychic,,190,33,58,33,58,33,2,False,405
121,113,Chansey,Normal,,250,5,5,35,105,50,1,False,450


In [82]:
df.drop(columns=['Total'], inplace=True)

In [83]:
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
316,292,Shedinja,Bug,Ghost,1,90,45,30,30,40,3,False
55,50,Diglett,Ground,,10,55,25,35,45,95,1,False
139,129,Magikarp,Water,,20,10,55,15,20,80,1,False
381,349,Feebas,Water,,20,15,20,10,55,80,3,False
487,439,Mime Jr.,Psychic,Fairy,20,25,45,70,90,60,4,False


In [84]:
df['Total'] = df.iloc[:, 4:10].sum(axis=1)
df

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
316,292,Shedinja,Bug,Ghost,1,90,45,30,30,40,3,False,236
55,50,Diglett,Ground,,10,55,25,35,45,95,1,False,265
139,129,Magikarp,Water,,20,10,55,15,20,80,1,False,200
381,349,Feebas,Water,,20,15,20,10,55,80,3,False,200
487,439,Mime Jr.,Psychic,Fairy,20,25,45,70,90,60,4,False,310
...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,594,Alomomola,Water,,165,75,80,40,45,65,5,False,470
351,321,Wailord,Water,,170,90,45,90,45,60,3,False,500
217,202,Wobbuffet,Psychic,,190,33,58,33,58,33,2,False,405
121,113,Chansey,Normal,,250,5,5,35,105,50,1,False,450


In [97]:
cols = list(df.columns)

df = df[cols[0:4] + [cols[-1]] + cols[4:12]]
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
316,292,Shedinja,Bug,Ghost,236,1,90,45,30,30,40,3,False
55,50,Diglett,Ground,,265,10,55,25,35,45,95,1,False
139,129,Magikarp,Water,,200,20,10,55,15,20,80,1,False
381,349,Feebas,Water,,200,20,15,20,10,55,80,3,False
487,439,Mime Jr.,Psychic,Fairy,310,20,25,45,70,90,60,4,False


In [104]:
set(list(df['Type 1']))

{'Bug',
 'Dark',
 'Dragon',
 'Electric',
 'Fairy',
 'Fighting',
 'Fire',
 'Flying',
 'Ghost',
 'Grass',
 'Ground',
 'Ice',
 'Normal',
 'Poison',
 'Psychic',
 'Rock',
 'Steel',
 'Water'}

## Filtering data (conditional selection)

In [117]:
df = df.fillna(-1)

In [123]:
df = df.loc[(df['Type 1'] == 'Psychic') & (df['Type 2'] != -1) & (df['HP'] >= 70)]
df.reset_index(drop=True, inplace=True)

In [124]:
df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,561,Sigilyph,Psychic,Flying,490,72,58,80,103,80,97,5,False
1,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
2,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True
3,251,Celebi,Psychic,Grass,600,100,100,100,100,100,100,2,False
4,494,Victini,Psychic,Fire,600,100,100,100,100,100,100,5,True
5,150,MewtwoMega Mewtwo X,Psychic,Fighting,780,106,190,100,154,100,130,1,True
6,249,Lugia,Psychic,Flying,680,106,90,130,90,154,110,2,True
