# Basics Of Pandas

### Why Pandas?
#### -Flexibility with Python
#### -Working with large and big data

In [4]:
# Importing pandas module

import pandas as pd

# Reading dataset
df_copy = pd.read_csv('pokedex.csv')
df = pd.read_csv('pokemon_data.csv')

### Reading Data in Pandas

In [19]:
## Reading Headers
df.columns

## Reading Columns
df['Name'] # This can be achieved using df.Name
df[['#', 'Type 1']]
df['Name'][0:5]   


## Reading Rows
df.iloc[1:4] # iloc means integer locations
df.iloc[[1,5,7]]
df.iloc[1:10:2]

# Iterating through each row
'''
for index, row in df.iterrows():
    print(index, row) # To specific columns:- row['column_name']. list is also supported
'''

## Reading specific rows and columns (R,C)
df.iloc[2,1]

# loc function is used for more textual based functionality
df.loc[df['Type 1'] == 'Fire']
df.loc[(df['Type 1'] == 'Fire') & (df['HP'] >= 50)]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
5,5,Charmeleon,Fire,,58,64,58,80,65,80,1,False
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,False
7,6,CharizardMega Charizard X,Fire,Dragon,78,130,111,130,85,100,1,False
8,6,CharizardMega Charizard Y,Fire,Flying,78,104,78,159,115,100,1,False
43,38,Ninetales,Fire,,73,76,75,81,100,100,1,False
63,58,Growlithe,Fire,,55,70,45,70,50,60,1,False
64,59,Arcanine,Fire,,90,110,80,100,80,95,1,False
83,77,Ponyta,Fire,,50,85,55,65,65,90,1,False
84,78,Rapidash,Fire,,65,100,70,80,80,105,1,False
135,126,Magmar,Fire,,65,95,57,100,85,93,1,False


### Sorting/Describing Data

In [None]:
df.describe()

df.sort_values('Name', ascending=False)
df.sort_values(['Type 1', 'HP'], ascending=[1, 1])


### Making Changes To The Data

In [27]:
df['Total'] = df.iloc[:,4:10].sum(axis=1)

cols = list(df.columns)
# You can rearrange the columns
df

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,525
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,625
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False,309
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True,600
796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True,700
797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True,600
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True,680


### Saving Data (Exporting into Desired Format)

In [None]:
df.to_csv('modified.csv', index=False, sep='\t')
df.to_excel('modified.xlsx')



### Filtering Data

In [50]:
# &, |, >, <, <=, >=

df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')]
df.loc[(df['Type 1'] == 'Grass') | (df['Type 2'] == 'Poison')]

new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)]

new_df.reset_index(drop=True, inplace=True)
# new_df.reset_index(drop=True)
new_df

df.loc[df['Name'].str.contains('Mega')]
df.loc[-df['Name'].str.contains('Mega')]

## Importing and using regex 
import re
df.loc[df['Type 1'].str.contains('Fire|Grass', regex=True)]
df.loc[df['Type 1'].str.contains('fire|grass', flags=re.I, regex=True)]

df.loc[df['Name'].str.contains('^pi[a-z]*', flags=re.I, regex=True)]


Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
20,16,Pidgey,Normal,Flying,40,45,40,35,35,56,1,False,251
21,17,Pidgeotto,Normal,Flying,63,60,55,50,50,71,1,False,349
22,18,Pidgeot,Normal,Flying,83,80,75,70,70,101,1,False,479
23,18,PidgeotMega Pidgeot,Normal,Flying,83,80,80,135,80,121,1,False,579
30,25,Pikachu,Electric,,35,55,40,50,50,90,1,False,320
136,127,Pinsir,Bug,,65,125,100,55,70,85,1,False,500
137,127,PinsirMega Pinsir,Bug,Flying,65,155,120,65,90,105,1,False,600
186,172,Pichu,Electric,,20,40,15,35,35,60,2,False,205
219,204,Pineco,Bug,,50,65,90,35,35,15,2,False,290
239,221,Piloswine,Ice,Ground,100,100,80,60,60,50,2,False,450


### Conditional Changes

In [53]:
df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer'
df.loc[df['Type 1'] == 'Fire', 'Legendary'] = True

# modifying multiple columns
df.loc[df['Total'] > 500, ['Generation', 'Legendary']] = 'Test value' # or modify individual column :- ['test value1', 'testvalue2']


Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,525
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,625
4,4,Charmander,Flamer,,39,52,43,60,50,65,1,False,309
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True,600
796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True,700
797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True,600
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True,680


### Aggregate Statistics (Groupby)

In [63]:
df.groupby(['Type 1']).sum(numeric_only=True)
df.groupby(['Type 1']).mean(numeric_only=True).sort_values('Attack', ascending=False)

df.groupby(['Type 1', 'Type 2']).count()


Unnamed: 0_level_0,Unnamed: 1_level_0,#,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
Type 1,Type 2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Bug,Electric,2,2,2,2,2,2,2,2,2,2,2
Bug,Fighting,2,2,2,2,2,2,2,2,2,2,2
Bug,Fire,2,2,2,2,2,2,2,2,2,2,2
Bug,Flying,14,14,14,14,14,14,14,14,14,14,14
Bug,Ghost,1,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
Water,Ice,3,3,3,3,3,3,3,3,3,3,3
Water,Poison,3,3,3,3,3,3,3,3,3,3,3
Water,Psychic,5,5,5,5,5,5,5,5,5,5,5
Water,Rock,4,4,4,4,4,4,4,4,4,4,4


### Working with large amounts of data

In [None]:
# loading dataframe in chunks
new_df = pd.DataFrame(columns=df.columns)
for df in pd.read_csv('pokemon_data.csv',chunksize=5):
    results = df.groupby(['Type 1']).count()

    new_df = pd.concat([new_df, results])
new_df

# More Information on Pandas

In [65]:
import pandas as pd

### Working with Series in Pandas

In [71]:
# Creating series
    # series handles one dimensional array
data = pd.Series([2,3,4,5], index=['a', 'b', 'c', 'd'])
data.index


# indexing is done in an explicit manner
data['a':'c'] # last element is inclusive


# Creating series from dictionary
grades_dict = {'A':4, 'B':3.5, 'C':3,'D':2.5, 'E':2}
grades = pd.Series(grades_dict)
marks_dict = {'A':40, 'B':75, 'C':93,'D':88, 'E':56}
marks = pd.Series(marks_dict)

grades
marks


A    40
B    75
C    93
D    88
E    56
dtype: int64

### Working with DataFrame

In [78]:
df = pd.DataFrame({'Marks':marks, 'Grades':grades})
df

df.T
df.values
df[['Marks', 'Grades']][0:2]
df.values[1,1]

3.5

### Indexing in Pandas
#####   data[1] # explicit index, use loc instead
#####   data[1:3] # implicit index, use iloc instead

In [None]:
# Changing columns

df.rename(columns={'Old1':'new1'}, implace=True)