# DataFrames
DataFrames are the workhorse of pandas and are directly inspired by the R programming language. We can think of a DataFrame as a bunch of Series objects put together to share the same index. 

In [3]:
import pandas as pd
import numpy as np

In [4]:
np.random.seed(100)

In [12]:
from numpy.random import randn as rd
df = pd.DataFrame(rd(5,4), index='A B C D E'.split(), columns='W X Y Z'.split())

In [13]:
df

Unnamed: 0,W,X,Y,Z
A,-1.704651,-1.136261,-2.973315,0.033317
B,-0.248889,-0.450176,0.132428,0.022214
C,0.317368,-0.752414,-1.296392,0.095139
D,-0.423715,-1.185984,-0.365462,-1.271023
E,1.586171,0.693391,-1.958081,-0.134801


#### Selection and Indexing
Cara mencari index dan menampilkan data pada DataFrames

In [15]:
df['W']

A   -1.704651
B   -0.248889
C    0.317368
D   -0.423715
E    1.586171
Name: W, dtype: float64

In [19]:
# Jika ingin dua column yang ingin di print, maka kurung siku nya 2
df[['W', 'X']]

Unnamed: 0,W,X
A,-1.704651,-1.136261
B,-0.248889,-0.450176
C,0.317368,-0.752414
D,-0.423715,-1.185984
E,1.586171,0.693391


In [21]:
type(df)

pandas.core.frame.DataFrame

#### Create a new column:

In [31]:
df['New'] = df['W'] + df['X']

df

Unnamed: 0,W,X,Y,Z,New
A,-1.704651,-1.136261,-2.973315,0.033317,-2.840912
B,-0.248889,-0.450176,0.132428,0.022214,-0.699065
C,0.317368,-0.752414,-1.296392,0.095139,-0.435046
D,-0.423715,-1.185984,-0.365462,-1.271023,-1.609699
E,1.586171,0.693391,-1.958081,-0.134801,2.279562


#### Removing Columns


In [32]:
df.drop('New', axis=1)
df
# Not inplace unless specified
# Colom tidak akan dihapus

Unnamed: 0,W,X,Y,Z,New
A,-1.704651,-1.136261,-2.973315,0.033317,-2.840912
B,-0.248889,-0.450176,0.132428,0.022214,-0.699065
C,0.317368,-0.752414,-1.296392,0.095139,-0.435046
D,-0.423715,-1.185984,-0.365462,-1.271023,-1.609699
E,1.586171,0.693391,-1.958081,-0.134801,2.279562


In [33]:
# Jadi yang bener adalah sebagai berikut
df.drop('New', axis=1, inplace=True)
df

Unnamed: 0,W,X,Y,Z
A,-1.704651,-1.136261,-2.973315,0.033317
B,-0.248889,-0.450176,0.132428,0.022214
C,0.317368,-0.752414,-1.296392,0.095139
D,-0.423715,-1.185984,-0.365462,-1.271023
E,1.586171,0.693391,-1.958081,-0.134801


In [39]:
df.drop('E', axis=0)

Unnamed: 0,W,X,Y,Z
A,-1.704651,-1.136261,-2.973315,0.033317
B,-0.248889,-0.450176,0.132428,0.022214
C,0.317368,-0.752414,-1.296392,0.095139
D,-0.423715,-1.185984,-0.365462,-1.271023


#### Selecting Rows

In [41]:
df.loc['A']

W   -1.704651
X   -1.136261
Y   -2.973315
Z    0.033317
Name: A, dtype: float64

In [43]:
df.iloc[1]

W   -0.248889
X   -0.450176
Y    0.132428
Z    0.022214
Name: B, dtype: float64

In [45]:
df.loc[['A', 'B'], ['W', 'Y']]

Unnamed: 0,W,Y
A,-1.704651,-2.973315
B,-0.248889,0.132428


#### Conditional Selection
An importand feature of pandas is conditional selection using bracket notation, very similar to numpy:


In [50]:
df

Unnamed: 0,W,X,Y,Z
A,-1.704651,-1.136261,-2.973315,0.033317
B,-0.248889,-0.450176,0.132428,0.022214
C,0.317368,-0.752414,-1.296392,0.095139
D,-0.423715,-1.185984,-0.365462,-1.271023
E,1.586171,0.693391,-1.958081,-0.134801


In [52]:
df[df['W'] > 0]['Y']

C   -1.296392
E   -1.958081
Name: Y, dtype: float64

In [53]:
df[(df['W'] > 0) & (df['Y'] > 1)]

Unnamed: 0,W,X,Y,Z


#### More Index Details 
Let's discuss some more features of indexing, including resetting the index or settig it something else. We'll also talk about index hierarchy!

In [55]:
df

Unnamed: 0,W,X,Y,Z
A,-1.704651,-1.136261,-2.973315,0.033317
B,-0.248889,-0.450176,0.132428,0.022214
C,0.317368,-0.752414,-1.296392,0.095139
D,-0.423715,-1.185984,-0.365462,-1.271023
E,1.586171,0.693391,-1.958081,-0.134801


In [56]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,-1.704651,-1.136261,-2.973315,0.033317
1,B,-0.248889,-0.450176,0.132428,0.022214
2,C,0.317368,-0.752414,-1.296392,0.095139
3,D,-0.423715,-1.185984,-0.365462,-1.271023
4,E,1.586171,0.693391,-1.958081,-0.134801


In [59]:
newIndex = 'ID MY CH TH MM'.split()
df['States'] = newIndex
df

Unnamed: 0,W,X,Y,Z,States
A,-1.704651,-1.136261,-2.973315,0.033317,ID
B,-0.248889,-0.450176,0.132428,0.022214,MY
C,0.317368,-0.752414,-1.296392,0.095139,CH
D,-0.423715,-1.185984,-0.365462,-1.271023,TH
E,1.586171,0.693391,-1.958081,-0.134801,MM


In [61]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ID,-1.704651,-1.136261,-2.973315,0.033317
MY,-0.248889,-0.450176,0.132428,0.022214
CH,0.317368,-0.752414,-1.296392,0.095139
TH,-0.423715,-1.185984,-0.365462,-1.271023
MM,1.586171,0.693391,-1.958081,-0.134801


In [62]:
df

Unnamed: 0,W,X,Y,Z,States
A,-1.704651,-1.136261,-2.973315,0.033317,ID
B,-0.248889,-0.450176,0.132428,0.022214,MY
C,0.317368,-0.752414,-1.296392,0.095139,CH
D,-0.423715,-1.185984,-0.365462,-1.271023,TH
E,1.586171,0.693391,-1.958081,-0.134801,MM


In [None]:
df.set_index('States', inplace=True)

In [66]:
df

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ID,-1.704651,-1.136261,-2.973315,0.033317
MY,-0.248889,-0.450176,0.132428,0.022214
CH,0.317368,-0.752414,-1.296392,0.095139
TH,-0.423715,-1.185984,-0.365462,-1.271023
MM,1.586171,0.693391,-1.958081,-0.134801


#### Multi-Index and Index Hierarchy
Let us go over how to work with Multi-Index, first we'll create a quick example of what a Multi Indexed DataFrame would look like