In [1]:
import numpy as np
import pandas as pd

In [2]:
labels = ['a','b','c']
lst = [10,20,30]
arr = np.array([11,12,13])
d = {'a': 40, 'b': 50, 'c': 60}

In [4]:
pd.Series(lst, index=labels)

a    10
b    20
c    30
dtype: int64

In [5]:
pd.Series(arr)

0    11
1    12
2    13
dtype: int32

In [6]:
pd.Series(d)

a    40
b    50
c    60
dtype: int64

In [7]:
pd.Series(data=labels)

0    a
1    b
2    c
dtype: object

In [12]:
ser1 = pd.Series([1,2,3,4], index=['USA','Brazil','France','Germany'])
ser2 = pd.Series([10,20,30,40], index=['USA','Brazil','Spain','Canada'])

In [14]:
ser1 + ser2

Brazil     22.0
Canada      NaN
France      NaN
Germany     NaN
Spain       NaN
USA        11.0
dtype: float64

In [16]:
from numpy.random import randn

In [22]:
np.random.seed(101)

In [23]:
df = pd.DataFrame(data=randn(5,4), index=['A','B','C','D','E'], columns=['W','X','Y','Z'])

In [24]:
df # Each column is a Series

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [25]:
df['W'] # or df.W (Not Recomended)

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [26]:
type(df['W'])

pandas.core.series.Series

In [28]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [29]:
df['new'] = df['W'] + df['Y']

In [30]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [31]:
df.drop('new', axis=1, inplace=True) # Axis 1 means Column

In [34]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [33]:
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [35]:
df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [36]:
df.iloc[0]

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [37]:
df.loc['B','Y']

-0.8480769834036315

In [38]:
df.loc[['B','E'],['X','Y']]

Unnamed: 0,X,Y
B,-0.319318,-0.848077
E,1.978757,2.605967


In [39]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [40]:
df[df > 0] # Not Common Much

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [41]:
df[df['W'] > 0] # A more common use of conditions

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [43]:
df[df['W']>0][['X','Y']]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
D,-0.758872,-0.933237
E,1.978757,2.605967


In [44]:
df[(df['W']>0) & (df['Y']>1)] # Use Bitwise '&' instead of Logical 'and'

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


In [49]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [48]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [50]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [51]:
df = pd.DataFrame(randn(6,2), hier_index, ['A','B'])

In [52]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [56]:
df.loc['G1'].loc[2]

A   -1.706086
B   -1.159119
Name: 2, dtype: float64

In [60]:
df.index.names = ['Groups', 'num']

In [61]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [62]:
df.loc['G1'].loc[2]['B']

-1.1591194155484297

In [63]:
df.xs('G1')

Unnamed: 0_level_0,A,B
num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [65]:
df.xs(2, level='num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-1.706086,-1.159119
G2,0.807706,0.07296
