# Pandas
## Lecture Notes

Import necessary packages:

In [129]:
import pandas as pd
import numpy as np

## 1. Data Types
### a) Series

In [133]:
b = pd.Series(['1', 3, 5])

In [134]:
b

0    1
1    3
2    5
dtype: object

In [135]:
s = pd.Series([1, 3, 5], index=['a', 'b', 'c'])

In [136]:
s

a    1
b    3
c    5
dtype: int64

### b) DataFrame

In [137]:
df = pd.DataFrame([[1, 3, 'r'], [5, 3, 'g'], [3, 4, 'b']])

In [138]:
df

Unnamed: 0,0,1,2
0,1,3,r
1,5,3,g
2,3,4,b


In [None]:
df = pd.DataFrame([[1, 3, 'r'], [5, 3, 'g'], [3, 4, 'b']], columns=['A', 'B', 'C'])

In [None]:
df

In [None]:
df = pd.DataFrame([[1, 3, 'r'], [5, 3, 'g'], [3, 4, 'b']], columns=['A', 'B', 'C'], index=['a', 'b', 'c'])

In [None]:
df

In [None]:
df = pd.DataFrame({'A': [1, 3, 'r'], 'B': [5, 3, 'g'], 'C': [3, 4, 'b']})

In [None]:
df

## 2. IO tools

In [None]:
pokemon = pd.read_csv(r'Pokemon.csv')

In [None]:
pokemon

In [None]:
pokemon = pd.read_csv(r'Pokemon.csv', index_col='Name', sep=',', decimal='.')

In [None]:
pokemon

In [None]:
pokemon = pd.read_csv(r'Pokemon.csv',header=3)

In [None]:
pokemon

## 3. Overview of Data

In [None]:
pokemon.describe()

In [None]:
import os
os.getcwd()

In [None]:
pokemon.info()

In [None]:
pokemon_describe

In [None]:
pokemon_describe = pokemon.describe()

In [None]:
type(pokemon_describe)

In [None]:
pokemon_describe.to_csv(r'pokemon_describe.csv')

In [None]:
pokemon_describe.to_csv(r'pokemon_describe.csv', sep=';', decimal=',')

## 4. Indexing/selecting data

### a) Indexing with loc

In [None]:
pokemon.loc['Bulbasaur']

In [None]:
pokemon.loc[['Bulbasaur', 'Ivysaur']]

In [None]:
pokemon.loc[1]

In [None]:
pokemon.loc[:,'Type 1']

In [None]:
pokemon.loc[:,['Type 1', 'Type 2']]

### b) Indexing with iloc

In [None]:
pokemon.iloc[2]

In [None]:
pokemon.iloc[[1, 3]]

In [None]:
pokemon.iloc[1:5]

In [None]:
pokemon.iloc[:,[0, 2]]

In [None]:
pokemon.iloc[0:-2]

In [None]:
pokemon.iloc[0:798]

### c) Indexing with []

In [None]:
pokemon['Generation']

In [None]:
pokemon.loc[:,['Generation', 'Attack']]

In [None]:
pokemon[['Generation', 'Attack']]

### d) Direct access

In [None]:
pokemon.Generation

#### Advanced indexing

In [None]:
pokemon.loc[['Bulbasaur', 'Volcanion'], ['Attack', 'Defense']]

In [None]:
pokemon.iloc[[0,2], 1:4]

In [None]:
pokemon.loc[:,['Type 1', 'Type 2']].iloc[0:2]

In [None]:
pokemon.loc[0]

In [None]:
pokemon.iloc[0]

## 4. Merge, join, concatenate

In [None]:
df1 = pd.DataFrame(np.random.randn(6, 4), index=list('abcdef'), columns=list('ABCD'))
df2 = pd.DataFrame(np.random.randn(6, 4), index=list('cdefgh'), columns=list('BCDE'))

In [None]:
df1

In [None]:
df2

### a) Concatenate

In [None]:
pd.concat([df1, df2])

In [None]:
pd.concat([df1, df2], axis = 1)

In [None]:
pd.concat([df1, df2], join='inner')

### b) Merge

In [None]:
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K1', 'K2', 'K3', 'K4'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']})

In [None]:
left

In [None]:
right

In [None]:
pd.merge(left, right)

In [None]:
pd.merge(left, right, how='outer')

In [None]:
pd.merge(left, right, how='left')

In [None]:
right2 = pd.DataFrame({'key': ['K1', 'K2', 'K3', 'K4'], 'A': ['A0', 'A1', 'A2', 'A3'], 'D': ['D0', 'D1', 'D2', 'D3']})
right2

In [None]:
pd.merge(left, right2)

In [None]:
pd.merge(left, right2, on='key')

In [None]:
left2 = left = pd.DataFrame({'key2': ['K0', 'K1', 'K2', 'K3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'B': ['B0', 'B1', 'B2', 'B3']})
left2

In [None]:
pd.merge(left2, right2, left_on = 'key2', right_on = 'key')

### c) Join

In [None]:
left2 = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index=['K0', 'K1', 'K2'])
right2 = pd.DataFrame({'C': ['C0', 'C2', 'C3'], 'D': ['D0', 'D2', 'D3']}, index=['K0', 'K2', 'K3'])
other_j = pd.DataFrame({'A': ['A0', 'A2', 'A3'], 'D': ['D0', 'D2', 'D3']}, index=['K0', 'K2', 'K3'])

In [None]:
left2

In [None]:
right2

In [None]:
other_j

In [None]:
left2.join(right2)

In [None]:
right2.join(left2)

In [None]:
right2.join(other_j)

In [None]:
right2.join(other_j, lsuffix='l', rsuffix='r')

## 5. Categorical Data

In [None]:
pokemon['Type 1']

In [None]:
pokemon['Type 1'] = pokemon['Type 1'].astype('category')

In [None]:
pokemon['Type 1']

In [None]:
pokemon['Type 1'].cat.codes

In [None]:
pokemon['Type 1.cat.codes'] = pokemon['Type 1'].cat.codes

In [None]:
pokemon

## 6. Group By

In [None]:
grouped_pokemon = pokemon.groupby('Type 1')

In [None]:
grouped_pokemon

In [None]:
grouped_pokemon.Attack.mean()

In [None]:
grouped_pokemon = pokemon.groupby('Type 1', sort=False)

In [None]:
grouped_pokemon.Attack.mean()

In [None]:
grouped_pokemon.Attack.mean().sort_values(ascending=False)

In [None]:
grouped_pokemon['Type 1'].groups

In [None]:
grouped_pokemon.get_group('Grass')

In [None]:
grouped_pokemon.agg({
    '#': 'size',
    'Attack': ['mean', 'median'],
    'Legendary': lambda x: round(sum(x)/len(x) * 100, 2)
})