# DataFrames

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn
np.random.seed(110)

In [3]:
df=pd.DataFrame(randn(5,4),index="A,B,C,D,E".split(","),columns="W X Y Z".split())

In [4]:
df

Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793
B,1.16673,1.147723,0.130109,0.431422
C,-0.86832,0.027159,-1.518739,-0.815561
D,-0.223637,-2.631096,0.046867,-0.376922
E,-0.16284,-0.744918,1.0693,-0.734714


# Selection and Indexing
Let's learn the various methods to grab data from a DataFrame

In [5]:
df["W"]

A    0.328597
B    1.166730
C   -0.868320
D   -0.223637
E   -0.162840
Name: W, dtype: float64

In [6]:
df[["W","Y"]]

Unnamed: 0,W,Y
A,0.328597,1.403124
B,1.16673,0.130109
C,-0.86832,-1.518739
D,-0.223637,0.046867
E,-0.16284,1.0693


In [7]:
df.W

A    0.328597
B    1.166730
C   -0.868320
D   -0.223637
E   -0.162840
Name: W, dtype: float64

# DataFrame colums are just Series

In [8]:
type(df["W"])

pandas.core.series.Series

In [9]:
df["Kotaro"]=df["W"]+df["Y"]

In [10]:
df

Unnamed: 0,W,X,Y,Z,Kotaro
A,0.328597,-0.796199,1.403124,-1.547793,1.731721
B,1.16673,1.147723,0.130109,0.431422,1.29684
C,-0.86832,0.027159,-1.518739,-0.815561,-2.387059
D,-0.223637,-2.631096,0.046867,-0.376922,-0.176771
E,-0.16284,-0.744918,1.0693,-0.734714,0.90646


In [11]:
df.drop("Kotaro",axis=1)

Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793
B,1.16673,1.147723,0.130109,0.431422
C,-0.86832,0.027159,-1.518739,-0.815561
D,-0.223637,-2.631096,0.046867,-0.376922
E,-0.16284,-0.744918,1.0693,-0.734714


In [12]:
df

Unnamed: 0,W,X,Y,Z,Kotaro
A,0.328597,-0.796199,1.403124,-1.547793,1.731721
B,1.16673,1.147723,0.130109,0.431422,1.29684
C,-0.86832,0.027159,-1.518739,-0.815561,-2.387059
D,-0.223637,-2.631096,0.046867,-0.376922,-0.176771
E,-0.16284,-0.744918,1.0693,-0.734714,0.90646


In [13]:
# to remove the colums permanently
df.drop("Kotaro",axis=1,inplace=True)

In [14]:
df

Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793
B,1.16673,1.147723,0.130109,0.431422
C,-0.86832,0.027159,-1.518739,-0.815561
D,-0.223637,-2.631096,0.046867,-0.376922
E,-0.16284,-0.744918,1.0693,-0.734714


# Can also drop eows this way:

In [15]:
df.drop("D",axis=0)

Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793
B,1.16673,1.147723,0.130109,0.431422
C,-0.86832,0.027159,-1.518739,-0.815561
E,-0.16284,-0.744918,1.0693,-0.734714


# **Selecting Rows**

In [16]:
df.loc["A"]

W    0.328597
X   -0.796199
Y    1.403124
Z   -1.547793
Name: A, dtype: float64

In [17]:
df.iloc[2]

W   -0.868320
X    0.027159
Y   -1.518739
Z   -0.815561
Name: C, dtype: float64

In [18]:
df.loc["A","Y"]

1.4031238284318888

In [19]:
df.loc[["A","C"],["W","Y"]]

Unnamed: 0,W,Y
A,0.328597,1.403124
C,-0.86832,-1.518739


# Conditional Selection

In [20]:
df 

Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793
B,1.16673,1.147723,0.130109,0.431422
C,-0.86832,0.027159,-1.518739,-0.815561
D,-0.223637,-2.631096,0.046867,-0.376922
E,-0.16284,-0.744918,1.0693,-0.734714


In [21]:
df>0

Unnamed: 0,W,X,Y,Z
A,True,False,True,False
B,True,True,True,True
C,False,True,False,False
D,False,False,True,False
E,False,False,True,False


In [22]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,0.328597,,1.403124,
B,1.16673,1.147723,0.130109,0.431422
C,,0.027159,,
D,,,0.046867,
E,,,1.0693,


In [23]:
df[df['W']>0.2]

Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793
B,1.16673,1.147723,0.130109,0.431422


In [24]:
df[df['W']>0][["Y","X"]]

Unnamed: 0,Y,X
A,1.403124,-0.796199
B,0.130109,1.147723


# For two conditions you can use | and & with parenthesis

In [25]:
df[(df['W']>0) & (df["Y"]>1 )]



Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793


# More Index Details

In [26]:
df

Unnamed: 0,W,X,Y,Z
A,0.328597,-0.796199,1.403124,-1.547793
B,1.16673,1.147723,0.130109,0.431422
C,-0.86832,0.027159,-1.518739,-0.815561
D,-0.223637,-2.631096,0.046867,-0.376922
E,-0.16284,-0.744918,1.0693,-0.734714


In [27]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,0.328597,-0.796199,1.403124,-1.547793
1,B,1.16673,1.147723,0.130109,0.431422
2,C,-0.86832,0.027159,-1.518739,-0.815561
3,D,-0.223637,-2.631096,0.046867,-0.376922
4,E,-0.16284,-0.744918,1.0693,-0.734714


In [28]:
newind="CA NY WY OR CO".split()

In [29]:
df["states"]=newind

In [30]:
df

Unnamed: 0,W,X,Y,Z,states
A,0.328597,-0.796199,1.403124,-1.547793,CA
B,1.16673,1.147723,0.130109,0.431422,NY
C,-0.86832,0.027159,-1.518739,-0.815561,WY
D,-0.223637,-2.631096,0.046867,-0.376922,OR
E,-0.16284,-0.744918,1.0693,-0.734714,CO


In [31]:
df.set_index("states")

Unnamed: 0_level_0,W,X,Y,Z
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,0.328597,-0.796199,1.403124,-1.547793
NY,1.16673,1.147723,0.130109,0.431422
WY,-0.86832,0.027159,-1.518739,-0.815561
OR,-0.223637,-2.631096,0.046867,-0.376922
CO,-0.16284,-0.744918,1.0693,-0.734714


In [32]:
df

Unnamed: 0,W,X,Y,Z,states
A,0.328597,-0.796199,1.403124,-1.547793,CA
B,1.16673,1.147723,0.130109,0.431422,NY
C,-0.86832,0.027159,-1.518739,-0.815561,WY
D,-0.223637,-2.631096,0.046867,-0.376922,OR
E,-0.16284,-0.744918,1.0693,-0.734714,CO


In [33]:
df.set_index("states",inplace=True)

In [34]:
df

Unnamed: 0_level_0,W,X,Y,Z
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,0.328597,-0.796199,1.403124,-1.547793
NY,1.16673,1.147723,0.130109,0.431422
WY,-0.86832,0.027159,-1.518739,-0.815561
OR,-0.223637,-2.631096,0.046867,-0.376922
CO,-0.16284,-0.744918,1.0693,-0.734714
