# Panda Series

In [2]:
import pandas as pd

In [3]:
import numpy as np

In [4]:
labels = ['a','b','c']
my_data = [10,20,30]
arr = np.array(my_data)
d = {'a':10, 'b':20, 'c':30}

In [5]:
pd.Series(data = my_data)

0    10
1    20
2    30
dtype: int64

In [6]:
pd.Series(data = my_data, index = labels )

a    10
b    20
c    30
dtype: int64

In [7]:
pd.Series(my_data, labels)

a    10
b    20
c    30
dtype: int64

In [8]:
pd.Series(arr, labels)

a    10
b    20
c    30
dtype: int32

In [9]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [10]:
pd.Series(labels)

0    a
1    b
2    c
dtype: object

In [11]:
pd.Series(data = [sum, print, len])

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [12]:
ser1 = pd.Series([1,2,3,4],['USA','Germany','USSR','Japan'])

In [13]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [14]:
ser2 = pd.Series([1,2,5,4],['USA','Germany','Italy','Japan'])

In [15]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [16]:
ser1['USA']

1

In [17]:
ser3 = pd.Series(data = labels)

In [18]:
ser3

0    a
1    b
2    c
dtype: object

In [19]:
ser3[2]

'c'

In [20]:
ser1 + ser2

Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64

# Data Frames

In [21]:
import numpy as np
import pandas as pd

In [22]:
from numpy.random import randn

In [23]:
np.random.seed(101) #to initialize the random number with a particular value

In [24]:
df = pd.DataFrame(randn(5,4), ['A','B','C','D','E'],['W','X','Y','Z'])

In [25]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [26]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [27]:
type(df['W'])

pandas.core.series.Series

In [28]:
type(df)

pandas.core.frame.DataFrame

In [29]:
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [30]:
df['new'] = df['W'] + df['Y']

In [31]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [32]:
df.drop('new', axis = 1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [33]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [34]:
df.drop('new', axis = 1, inplace = True)

In [35]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [36]:
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [37]:
df.shape

(5, 4)

In [38]:
df.loc['A'] #to access rows in a panda dataframe and this also returns a series

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [39]:
df.iloc[2] #based on the index of the rows(numerical based)

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [40]:
df.loc['B','Y']

-0.8480769834036315

In [41]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


# Selection and Indexing

In [42]:
import numpy as np
import pandas as pd

In [43]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [44]:
booldf = df > 0

In [45]:
df[booldf]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [46]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [47]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [48]:
df['W'] > 0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [49]:
 df[df['Y'] > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
E,0.190794,1.978757,2.605967,0.683509


In [50]:
resultdf = df[ df['Y'] > 0]

In [51]:
resultdf

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
C,-2.018168,0.740122,0.528813,-0.589001
E,0.190794,1.978757,2.605967,0.683509


In [52]:
resultdf['X']

A    0.628133
C    0.740122
E    1.978757
Name: X, dtype: float64

In [53]:
df[df['Y'] > 0][['X','Y']]


Unnamed: 0,X,Y
A,0.628133,0.907969
C,0.740122,0.528813
E,1.978757,2.605967


In [54]:
bolster = df['W'] > 0
result = df[bolster]
mycols = ['W','X']
result[mycols]

Unnamed: 0,W,X
A,2.70685,0.628133
B,0.651118,-0.319318
D,0.188695,-0.758872
E,0.190794,1.978757


In [55]:
df[df['W'] > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [56]:
df[(df['W'] > 0) & (df['Y'] < 0)]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [57]:
df[(df['W'] > 0) | (df['Y'] < 0)]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [59]:
df.reset_index() #Used to reset the index to the conventional 0 to n-1 rows

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [61]:
newind = 'CA NY WY OR CO'.split()

In [63]:
df['States'] = newind

In [64]:
df

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO


In [65]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


# Multi-Index and Index Hierarchy

In [2]:
import numpy as np
import pandas as pd

In [3]:
#Index levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [4]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [6]:
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])

In [7]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-1.109045,0.669196
G1,2,0.735831,1.238117
G1,3,-1.508037,-0.733686
G2,1,-2.362523,-0.908625
G2,2,0.20878,-0.544658
G2,3,-0.800627,0.440164


In [9]:
df.loc['G1']

Unnamed: 0,A,B
1,-1.109045,0.669196
2,0.735831,1.238117
3,-1.508037,-0.733686


In [10]:
df.loc['G1'].loc[1]

A   -1.109045
B    0.669196
Name: 1, dtype: float64

In [11]:
df.index.names

FrozenList([None, None])

In [12]:
df.index.names = ['Group','Num']

In [13]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-1.109045,0.669196
G1,2,0.735831,1.238117
G1,3,-1.508037,-0.733686
G2,1,-2.362523,-0.908625
G2,2,0.20878,-0.544658
G2,3,-0.800627,0.440164


In [14]:
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-1.109045,0.669196
2,0.735831,1.238117
3,-1.508037,-0.733686


In [15]:
df.xs(['G1',1])

A   -1.109045
B    0.669196
Name: (G1, 1), dtype: float64

In [16]:
df.xs(1,level='Num')

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-1.109045,0.669196
G2,-2.362523,-0.908625
