In [9]:
import pandas as pd
import numpy as np
from numpy.random import randn

In [14]:
np.random.seed(101)

In [19]:
randn(5,4)

array([[ 0.38603031,  2.08401853, -0.37651868,  0.23033634],
       [ 0.68120929,  1.03512507, -0.03116048,  1.93993231],
       [-1.00518692, -0.7417897 ,  0.18712452, -0.73284515],
       [-1.3829201 ,  1.4824955 ,  0.96145816, -2.14121229],
       [ 0.99257345,  1.19224064, -1.04677954,  1.29276458]])

In [20]:
df = pd.DataFrame(randn(5,4), index="A B C D E".split(), columns="W X Y Z".split())

In [21]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


### Selection and Indexing

In [22]:
df["W"]

A   -1.467514
B    0.392489
C    0.666319
D    0.641806
E   -1.972605
Name: W, dtype: float64

In [23]:
df[['W', 'Z']]

Unnamed: 0,W,Z
A,-1.467514,0.485809
B,0.392489,1.54199
C,0.666319,1.407338
D,0.641806,1.028293
E,-1.972605,-1.223082


In [24]:
#Not recomended
df.W

A   -1.467514
B    0.392489
C    0.666319
D    0.641806
E   -1.972605
Name: W, dtype: float64

In [26]:
type(df['W'])

pandas.core.series.Series

### Creating a new columns

In [27]:
df['new'] = df['W'] + df['Y']

In [28]:
df

Unnamed: 0,W,X,Y,Z,new
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


### Removing columns

In [30]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [31]:
df

Unnamed: 0,W,X,Y,Z,new
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


In [32]:
df.drop('new', axis=1, inplace=True)

In [33]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [34]:
df.drop('E',axis=0)

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293


In [35]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


### Selecting Rows

In [37]:
df.loc['A']

W   -1.467514
X   -0.494095
Y   -0.162535
Z    0.485809
Name: A, dtype: float64

### Selecting Subset of rows and columns

In [38]:
df.loc['B','Y']

-0.8551960407780934

In [39]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,-1.467514,-0.162535
B,0.392489,-0.855196


### Conditional Selection

In [40]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [41]:
df>0

Unnamed: 0,W,X,Y,Z
A,False,False,False,True
B,True,True,False,True
C,True,False,False,True
D,True,False,False,True
E,False,False,True,False


In [42]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,,,,0.485809
B,0.392489,0.221491,,1.54199
C,0.666319,,,1.407338
D,0.641806,,,1.028293
E,,,0.720788,


In [43]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293


In [44]:
df[df['W']>0]['Z']

B    1.541990
C    1.407338
D    1.028293
Name: Z, dtype: float64

In [46]:
df[df['W']>0][['Z','Y']]

Unnamed: 0,Z,Y
B,1.54199,-0.855196
C,1.407338,-0.568581
D,1.028293,-0.391157


In [52]:
df[df['Z']>0]

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293


In [55]:
df[(df['W']>0) & (df['X']>0)]

Unnamed: 0,W,X,Y,Z
B,0.392489,0.221491,-0.855196,1.54199


### More Index Details

In [56]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [57]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,-1.467514,-0.494095,-0.162535,0.485809
1,B,0.392489,0.221491,-0.855196,1.54199
2,C,0.666319,-0.538235,-0.568581,1.407338
3,D,0.641806,-0.9051,-0.391157,1.028293
4,E,-1.972605,-0.866885,0.720788,-1.223082


In [58]:
newind = "CA NY XY OR CO".split()

In [59]:
df['States'] = newind

In [60]:
df

Unnamed: 0,W,X,Y,Z,States
A,-1.467514,-0.494095,-0.162535,0.485809,CA
B,0.392489,0.221491,-0.855196,1.54199,NY
C,0.666319,-0.538235,-0.568581,1.407338,XY
D,0.641806,-0.9051,-0.391157,1.028293,OR
E,-1.972605,-0.866885,0.720788,-1.223082,CO


In [61]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,-1.467514,-0.494095,-0.162535,0.485809
NY,0.392489,0.221491,-0.855196,1.54199
XY,0.666319,-0.538235,-0.568581,1.407338
OR,0.641806,-0.9051,-0.391157,1.028293
CO,-1.972605,-0.866885,0.720788,-1.223082


In [62]:
df

Unnamed: 0,W,X,Y,Z,States
A,-1.467514,-0.494095,-0.162535,0.485809,CA
B,0.392489,0.221491,-0.855196,1.54199,NY
C,0.666319,-0.538235,-0.568581,1.407338,XY
D,0.641806,-0.9051,-0.391157,1.028293,OR
E,-1.972605,-0.866885,0.720788,-1.223082,CO


In [63]:
df.set_index('States', inplace=True)

In [64]:
df

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,-1.467514,-0.494095,-0.162535,0.485809
NY,0.392489,0.221491,-0.855196,1.54199
XY,0.666319,-0.538235,-0.568581,1.407338
OR,0.641806,-0.9051,-0.391157,1.028293
CO,-1.972605,-0.866885,0.720788,-1.223082


### DataFrame Summaries

In [65]:
df.describe()

Unnamed: 0,W,X,Y,Z
count,5.0,5.0,5.0,5.0
mean,-0.347901,-0.516565,-0.251336,0.64807
std,1.269797,0.452599,0.599531,1.123353
min,-1.972605,-0.9051,-0.855196,-1.223082
25%,-1.467514,-0.866885,-0.568581,0.485809
50%,0.392489,-0.538235,-0.391157,1.028293
75%,0.641806,-0.494095,-0.162535,1.407338
max,0.666319,0.221491,0.720788,1.54199


In [66]:
df.dtypes

W    float64
X    float64
Y    float64
Z    float64
dtype: object

In [67]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, CA to CO
Data columns (total 4 columns):
W    5 non-null float64
X    5 non-null float64
Y    5 non-null float64
Z    5 non-null float64
dtypes: float64(4)
memory usage: 200.0+ bytes
