# DataFrames
We can think of a DataFrame as a bunch of Series objects put together to share the same index.

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(101) # fix same random variables

In [3]:
# CREATING DATAFRAMES
df = pd.DataFrame(data=np.random.randn(5,4),index=['A','B','C','D','E'],columns=['W','X','Y','Z'])
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [4]:
# INDEXING and SELECTION
df['W'] # using column

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [5]:
df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [6]:
df[['W','Z']] # multiple columns

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [7]:
# CREATING NEW COLUMN, if it doesn't exist else updated
df['new'] = df['W'] + df['Y']
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [8]:
# DELETING A ROW
df.drop('A',axis=0) # by default... but to hold use inplace
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [9]:
# DELETING A COLUMN
df.drop('new',axis=1,inplace=True)

In [10]:
# SELECTING ROWS
df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [11]:
df.iloc[2]   # index based even if labelled otherwise

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [12]:
df.loc['B','Y'] # row , column

-0.8480769834036315

In [13]:
df.loc[['A','C'],['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
C,-2.018168,0.528813
