## Pandas - Data Frame

In [7]:
import numpy as np
from numpy.random import randn

import pandas as pd

In [8]:
np.random.seed(101) # seed ensures that we are getting same random number everytime

In [9]:
randn(10)

array([ 2.70684984,  0.62813271,  0.90796945,  0.50382575,  0.65111795,
       -0.31931804, -0.84807698,  0.60596535, -2.01816824,  0.74012206])

In [10]:
df = pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])
df

Unnamed: 0,W,X,Y,Z
A,0.528813,-0.589001,0.188695,-0.758872
B,-0.933237,0.955057,0.190794,1.978757
C,2.605967,0.683509,0.302665,1.693723
D,-1.706086,-1.159119,-0.134841,0.390528
E,0.166905,0.184502,0.807706,0.07296


In [11]:
df.shape

(5, 4)

In [12]:
df['W']

A    0.528813
B   -0.933237
C    2.605967
D   -1.706086
E    0.166905
Name: W, dtype: float64

In [13]:
df.W # not recommended to use this notation

A    0.528813
B   -0.933237
C    2.605967
D   -1.706086
E    0.166905
Name: W, dtype: float64

In [14]:
df[['W', 'Z']]

Unnamed: 0,W,Z
A,0.528813,-0.758872
B,-0.933237,1.978757
C,2.605967,1.693723
D,-1.706086,0.390528
E,0.166905,0.07296


In [15]:
df['NEW'] = df['W']
df

Unnamed: 0,W,X,Y,Z,NEW
A,0.528813,-0.589001,0.188695,-0.758872,0.528813
B,-0.933237,0.955057,0.190794,1.978757,-0.933237
C,2.605967,0.683509,0.302665,1.693723,2.605967
D,-1.706086,-1.159119,-0.134841,0.390528,-1.706086
E,0.166905,0.184502,0.807706,0.07296,0.166905


In [16]:
df.drop('NEW', axis=1)

Unnamed: 0,W,X,Y,Z
A,0.528813,-0.589001,0.188695,-0.758872
B,-0.933237,0.955057,0.190794,1.978757
C,2.605967,0.683509,0.302665,1.693723
D,-1.706086,-1.159119,-0.134841,0.390528
E,0.166905,0.184502,0.807706,0.07296


In [17]:
df.drop('A', inplace=True) # replace in current dataframe instead of creating new data frame with replacement
df

Unnamed: 0,W,X,Y,Z,NEW
B,-0.933237,0.955057,0.190794,1.978757,-0.933237
C,2.605967,0.683509,0.302665,1.693723,2.605967
D,-1.706086,-1.159119,-0.134841,0.390528,-1.706086
E,0.166905,0.184502,0.807706,0.07296,0.166905


In [18]:
df['Z'] # select column data

B    1.978757
C    1.693723
D    0.390528
E    0.072960
Name: Z, dtype: float64

In [19]:
df.loc['D'] # select row data using row index name

W     -1.706086
X     -1.159119
Y     -0.134841
Z      0.390528
NEW   -1.706086
Name: D, dtype: float64

In [20]:
df.iloc[1] # select row data using index

W      2.605967
X      0.683509
Y      0.302665
Z      1.693723
NEW    2.605967
Name: C, dtype: float64

In [21]:
df.loc['B', 'Y'] # get the value for row and column cross

0.19079432237171562

In [22]:
df.loc[['B', 'D'], ['Y', 'NEW']] # select partial data frame

Unnamed: 0,Y,NEW
B,0.190794,-0.933237
D,-0.134841,-1.706086
