# Panda Series Creation

In [3]:
import pandas as pd

In [5]:
X = pd.Series([10,20,30,40])
X

0    10
1    20
2    30
3    40
dtype: int64

In [6]:
print(X.index, X.values, sep = "\n")

RangeIndex(start=0, stop=4, step=1)
[10 20 30 40]


In [9]:
print(X[1], X[[1,2,3]], X[1:4], sep = "\n\n")

20

1    20
2    30
3    40
dtype: int64

1    20
2    30
3    40
dtype: int64


# Pandas Series creation with index values

In [11]:
X = pd.Series([10,20,30,40], index = ['a', 'b', 'c', 'd'])
X

a    10
b    20
c    30
d    40
dtype: int64

In [12]:
print(X.index, X.values, sep = "\n")

Index(['a', 'b', 'c', 'd'], dtype='object')
[10 20 30 40]


In [13]:
print(X['b'], X[['b', 'c', 'd']], X['b':'d'], sep = "\n\n")

20

b    20
c    30
d    40
dtype: int64

b    20
c    30
d    40
dtype: int64


# Filter operation using index

In [14]:
X = pd.Series([10,-10,20,-20,30,-30,40,-40,50,60,-65], index = ['a', 'b', 'c', 'd','e','f','g','h','i','j','k'])
X

a    10
b   -10
c    20
d   -20
e    30
f   -30
g    40
h   -40
i    50
j    60
k   -65
dtype: int64

In [17]:
print(X[X<0], X[X<-20], X[X>-40], sep = "\n\n")

b   -10
d   -20
f   -30
h   -40
k   -65
dtype: int64

f   -30
h   -40
k   -65
dtype: int64

a    10
b   -10
c    20
d   -20
e    30
f   -30
g    40
i    50
j    60
dtype: int64


In [18]:
X<0

a    False
b     True
c    False
d     True
e    False
f     True
g    False
h     True
i    False
j    False
k     True
dtype: bool

In [19]:
X = pd.Series([10,-10,20,-20,30,-30,40,-40,50,60,-65])
X

0     10
1    -10
2     20
3    -20
4     30
5    -30
6     40
7    -40
8     50
9     60
10   -65
dtype: int64

In [20]:
X<0

0     False
1      True
2     False
3      True
4     False
5      True
6     False
7      True
8     False
9     False
10     True
dtype: bool

In [21]:
print(X[X<0], X[X<-20], X[X>-40], sep = "\n\n")

1    -10
3    -20
5    -30
7    -40
10   -65
dtype: int64

5    -30
7    -40
10   -65
dtype: int64

0    10
1   -10
2    20
3   -20
4    30
5   -30
6    40
8    50
9    60
dtype: int64


# Mathematical Operation on Series Object

In [22]:
X1 = pd.Series([10,20,30,40], index = ['a', 'b', 'c', 'd'])
X2 = pd.Series([10,20,30,50], index = ['b', 'c', 'd', 'e'])

X1 + X2

a     NaN
b    30.0
c    50.0
d    70.0
e     NaN
dtype: float64

In [25]:
print([X1 + X1], [X1 - X1], [X1 * X1], [X1/X1], sep = "\n\n")

[a    20
b    40
c    60
d    80
dtype: int64]

[a    0
b    0
c    0
d    0
dtype: int64]

[a     100
b     400
c     900
d    1600
dtype: int64]

[a    1.0
b    1.0
c    1.0
d    1.0
dtype: float64]


In [24]:
print([X1 * X2], [X1 - X2], [X1/X2], sep ="\n\n")

[a       NaN
b     200.0
c     600.0
d    1200.0
e       NaN
dtype: float64]

[a     NaN
b    10.0
c    10.0
d    10.0
e     NaN
dtype: float64]

[a         NaN
b    2.000000
c    1.500000
d    1.333333
e         NaN
dtype: float64]


# Panda Series Object Creation Using Dictionary

In [26]:
D = {'a':2, 'b':4, 'c':6, 'd':8, 'e':10}
X = pd.Series(D)
X

a     2
b     4
c     6
d     8
e    10
dtype: int64

In [27]:
X = pd.Series(D, index = ['e', 'd','c', 'b', 'a', 'f'])
X

e    10.0
d     8.0
c     6.0
b     4.0
a     2.0
f     NaN
dtype: float64

In [30]:
print(X.isnull(), X.notnull(), sep ="\n\n")

e    False
d    False
c    False
b    False
a    False
f     True
dtype: bool

e     True
d     True
c     True
b     True
a     True
f    False
dtype: bool


# Dataframe Object Creation

In [31]:
import numpy as np
import pandas as pd

In [36]:
D ={
    "State":["MP", "UP", "AP", "AS","KA", "KL"],
    "Name":["Sutapa", "Shaunav", "Subhod", "Shayan", "Bapi", "Manti"],
    "Age":[40, 10, 44, 24, 35, 40],
    "Year":[1983, 2013, 1978, 1995, 1988, 1984]
}
X = pd.DataFrame(D)
X

Unnamed: 0,State,Name,Age,Year
0,MP,Sutapa,40,1983
1,UP,Shaunav,10,2013
2,AP,Subhod,44,1978
3,AS,Shayan,24,1995
4,KA,Bapi,35,1988
5,KL,Manti,40,1984


In [38]:
X = pd.DataFrame(D, columns = ["Year","State", "Name"])
X

Unnamed: 0,Year,State,Name
0,1983,MP,Sutapa
1,2013,UP,Shaunav
2,1978,AP,Subhod
3,1995,AS,Shayan
4,1988,KA,Bapi
5,1984,KL,Manti


In [43]:
X = pd.DataFrame(D, columns = ["Year","State", "Population"])
X

Unnamed: 0,Year,State,Population
0,1983,MP,
1,2013,UP,
2,1978,AP,
3,1995,AS,
4,1988,KA,
5,1984,KL,


# Access column Details

In [46]:
X = pd.DataFrame(D, columns = ["Year","State","Age","Population"])
X

Unnamed: 0,Year,State,Age,Population
0,1983,MP,40,
1,2013,UP,10,
2,1978,AP,44,
3,1995,AS,24,
4,1988,KA,35,
5,1984,KL,40,


In [48]:
print(X['Year'], X.Year, sep ="\n\n")

0    1983
1    2013
2    1978
3    1995
4    1988
5    1984
Name: Year, dtype: int64

0    1983
1    2013
2    1978
3    1995
4    1988
5    1984
Name: Year, dtype: int64


In [55]:
print(X[['Year','State','Age']], sep ="\n\n")

   Year State  Age
0  1983    MP   40
1  2013    UP   10
2  1978    AP   44
3  1995    AS   24
4  1988    KA   35
5  1984    KL   40


In [56]:
X.columns

Index(['Year', 'State', 'Age', 'Population'], dtype='object')

In [57]:
X['Population'] = 10
X

Unnamed: 0,Year,State,Age,Population
0,1983,MP,40,10
1,2013,UP,10,10
2,1978,AP,44,10
3,1995,AS,24,10
4,1988,KA,35,10
5,1984,KL,40,10


In [58]:
X['Population'] = [10,100,1000,10000,100000,1000000]
X

Unnamed: 0,Year,State,Age,Population
0,1983,MP,40,10
1,2013,UP,10,100
2,1978,AP,44,1000
3,1995,AS,24,10000
4,1988,KA,35,100000
5,1984,KL,40,1000000


In [59]:
X

Unnamed: 0,Year,State,Age,Population
0,1983,MP,40,10
1,2013,UP,10,100
2,1978,AP,44,1000
3,1995,AS,24,10000
4,1988,KA,35,100000
5,1984,KL,40,1000000


In [61]:
X['Age >25']= X['Age']>25
X

Unnamed: 0,Year,State,Age,Population,Age >25
0,1983,MP,40,10,True
1,2013,UP,10,100,False
2,1978,AP,44,1000,True
3,1995,AS,24,10000,False
4,1988,KA,35,100000,True
5,1984,KL,40,1000000,True


# Update Column Value, Transpose and Delete Column

In [62]:
X = pd.DataFrame(D, columns = ["Year","State","Age","Population"],index = ['a','b','c','d','e','f'])
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,
b,2013,UP,10,
c,1978,AP,44,
d,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


In [64]:
Xpop = pd.Series([100, 200, 300, 400, 500,600], index = ['a','b','c','d','e','f'])
Xpop

a    100
b    200
c    300
d    400
e    500
f    600
dtype: int64

In [65]:
X['Population']= Xpop
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,100
b,2013,UP,10,200
c,1978,AP,44,300
d,1995,AS,24,400
e,1988,KA,35,500
f,1984,KL,40,600


In [66]:
print(X.transpose(), X.T, sep="\n\n")

               a     b     c     d     e     f
Year        1983  2013  1978  1995  1988  1984
State         MP    UP    AP    AS    KA    KL
Age           40    10    44    24    35    40
Population   100   200   300   400   500   600

               a     b     c     d     e     f
Year        1983  2013  1978  1995  1988  1984
State         MP    UP    AP    AS    KA    KL
Age           40    10    44    24    35    40
Population   100   200   300   400   500   600


In [67]:
del X["Population"]
X

Unnamed: 0,Year,State,Age
a,1983,MP,40
b,2013,UP,10
c,1978,AP,44
d,1995,AS,24
e,1988,KA,35
f,1984,KL,40


# Properties of Index Object(Can be Repeated, Immutable)

In [68]:
X = pd.DataFrame(D, columns = ["Year","State","Age","Population"],index = ['a','b','c','d','e','f'])
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,
b,2013,UP,10,
c,1978,AP,44,
d,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


In [69]:
print(X.index, X.columns, sep ="\n\n")

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

Index(['Year', 'State', 'Age', 'Population'], dtype='object')


In [70]:
X = pd.DataFrame(D, columns = ["Year","State","Age","Population"],index = ['a','a','a','a','e','f'])
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,
a,2013,UP,10,
a,1978,AP,44,
a,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


In [73]:
X.index[1]='b'

TypeError: Index does not support mutable operations

In [74]:
X.columns[0]="Col1"

TypeError: Index does not support mutable operations

# Reindexing (Series, DataFrame Row/Column)

In [75]:
X = pd.Series([10, 20, 30, 40], index = ['a', 'b', 'c', 'd'])
X

a    10
b    20
c    30
d    40
dtype: int64

In [76]:
X1 = X.reindex(['b', 'c', 'd', 'e', 'f'])
X1

b    20.0
c    30.0
d    40.0
e     NaN
f     NaN
dtype: float64

In [77]:
X = pd.DataFrame(D, columns = ["Year","State","Age","Population"],index = ['a','b','c','d','e','f'])
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,
b,2013,UP,10,
c,1978,AP,44,
d,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


In [78]:
X1 = X.reindex(['b','c','d','e','f','g','h'])
X1

Unnamed: 0,Year,State,Age,Population
b,2013.0,UP,10.0,
c,1978.0,AP,44.0,
d,1995.0,AS,24.0,
e,1988.0,KA,35.0,
f,1984.0,KL,40.0,
g,,,,
h,,,,


In [80]:
X2 = X.reindex(columns = ["Year","State","Col1", "Col2"])
X2

Unnamed: 0,Year,State,Col1,Col2
a,1983,MP,,
b,2013,UP,,
c,1978,AP,,
d,1995,AS,,
e,1988,KA,,
f,1984,KL,,


In [81]:
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,
b,2013,UP,10,
c,1978,AP,44,
d,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


# Dropping Index

In [82]:
X = pd.Series([10, 20, 30, 40], index = ['a', 'b', 'c', 'd'])
X

a    10
b    20
c    30
d    40
dtype: int64

In [84]:
X.drop('a')

b    20
c    30
d    40
dtype: int64

In [85]:
X.drop(['a','b'])

c    30
d    40
dtype: int64

In [86]:
X = pd.DataFrame(D, columns = ["Year","State","Age","Population"],index = ['a','b','c','d','e','f'])
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,
b,2013,UP,10,
c,1978,AP,44,
d,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


In [87]:
X.drop(['a','b'])

Unnamed: 0,Year,State,Age,Population
c,1978,AP,44,
d,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


In [88]:
X.drop(columns=["Year", "Population"])

Unnamed: 0,State,Age
a,MP,40
b,UP,10
c,AP,44
d,AS,24
e,KA,35
f,KL,40


In [89]:
X

Unnamed: 0,Year,State,Age,Population
a,1983,MP,40,
b,2013,UP,10,
c,1978,AP,44,
d,1995,AS,24,
e,1988,KA,35,
f,1984,KL,40,


In [90]:
X.drop(columns=["Year", "Population"], inplace = True)

In [91]:
X

Unnamed: 0,State,Age
a,MP,40
b,UP,10
c,AP,44
d,AS,24
e,KA,35
f,KL,40


# Indexing, Selection & Filtering

In [92]:
X = pd.Series([10,20,30,40,50], index = ['a','b','c','d','e'])
X

a    10
b    20
c    30
d    40
e    50
dtype: int64

In [94]:
print(X['a'], X[0],X['d'], X[3], sep =",")

10,10,40,40


In [98]:
print(X['a':'d'], X[0:4], sep = "\n")

a    10
b    20
c    30
d    40
dtype: int64
a    10
b    20
c    30
d    40
dtype: int64


In [99]:
print(X, X[X>20], sep = "\n")

a    10
b    20
c    30
d    40
e    50
dtype: int64
c    30
d    40
e    50
dtype: int64


In [102]:
X[X>20] = 99
X
 

a    10
b    20
c    99
d    99
e    99
dtype: int64

In [103]:
X['c':'e'] = 199
X

a     10
b     20
c    199
d    199
e    199
dtype: int64

In [106]:
X = pd.DataFrame(np.arange(15).reshape(5,3), index = ['a', 'b', 'c', 'd', 'e'], columns = ['col1', 'col2', 'col3'])
X

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11
e,12,13,14


In [110]:
print(X['col1'], X.loc['a'],X.iloc[4], sep = "\n\n")


a     0
b     3
c     6
d     9
e    12
Name: col1, dtype: int32

col1    0
col2    1
col3    2
Name: a, dtype: int32

col1    12
col2    13
col3    14
Name: e, dtype: int32


In [111]:
print(X.loc['a':'d'],X.iloc[0:4], sep = "\n\n")

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11


In [112]:
print(X.loc['a':'d', 'col1':'col2'],X.iloc[0:4,0:2], sep = "\n\n")

   col1  col2
a     0     1
b     3     4
c     6     7
d     9    10

   col1  col2
a     0     1
b     3     4
c     6     7
d     9    10


In [113]:
print(X.loc[['a','d'],['col1','col2']],X.iloc[[0,4],[0,2]], sep = "\n\n")

   col1  col2
a     0     1
d     9    10

   col1  col3
a     0     2
e    12    14


In [114]:
X

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11
e,12,13,14


In [115]:
X[X['col3']>5]

Unnamed: 0,col1,col2,col3
c,6,7,8
d,9,10,11
e,12,13,14


In [116]:
X

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11
e,12,13,14


In [117]:
print(X.at['b','col3'], X.iat[1,2], sep = ",")

5,5


# Hierarchial Indexing

In [118]:
X = pd.DataFrame(np.arange(15).reshape(5,3), index = ['a', 'b', 'c', 'd', 'e'], columns = ['col1', 'col2', 'col3'])
X

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11
e,12,13,14


In [122]:
X.loc[:'d',:]

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [123]:
X.loc[:'d',:][X.col3>5]

  X.loc[:'d',:][X.col3>5]


Unnamed: 0,col1,col2,col3
c,6,7,8
d,9,10,11


In [124]:
X.loc[:'d',:][X.col3>5][X.col2>7]

  X.loc[:'d',:][X.col3>5][X.col2>7]


Unnamed: 0,col1,col2,col3
d,9,10,11


# Addition

In [125]:
X1 = pd.Series([10, 20, 30], index = ['a', 'b', 'c'])
X2 = pd.Series([10, 20, 30], index = ['b', 'c','d'])

X1 + X2

a     NaN
b    30.0
c    50.0
d     NaN
dtype: float64

In [126]:
X1.add(X2, fill_value = 200)

a    210.0
b     30.0
c     50.0
d    230.0
dtype: float64

In [131]:
X1 = pd.DataFrame(np.arange(15).reshape(5,3), index = ['a', 'b', 'c','d','e'], columns =['col1', 'col2', 'col3'])
X2 = pd.DataFrame(np.arange(15).reshape(5,3), index = ['b', 'c','d','e','f'], columns =['col2', 'col3', 'col4'])
                                                                                        
print(X1, X2, X1 + X2, sep = "\n\n")                                                                                       

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11
e    12    13    14

   col2  col3  col4
b     0     1     2
c     3     4     5
d     6     7     8
e     9    10    11
f    12    13    14

   col1  col2  col3  col4
a   NaN   NaN   NaN   NaN
b   NaN   4.0   6.0   NaN
c   NaN  10.0  12.0   NaN
d   NaN  16.0  18.0   NaN
e   NaN  22.0  24.0   NaN
f   NaN   NaN   NaN   NaN


In [130]:
X1.add(X2, fill_value = 10)

Unnamed: 0,col1,col2,col3,col4
a,10.0,11.0,12.0,
b,13.0,4.0,6.0,12.0
c,16.0,10.0,12.0,15.0
d,19.0,16.0,18.0,18.0
e,22.0,22.0,24.0,21.0
f,,22.0,23.0,24.0


In [136]:
X1 = pd.DataFrame(np.arange(15).reshape(5,3), index = ['a', 'b', 'c','d','e'], columns =['col1', 'col2', 'col3'])
X2 = pd.DataFrame(np.arange(10,25).reshape(5,3), index = ['a', 'b', 'c','d','e'], columns =['col1', 'col2', 'col3'])

In [138]:
print(X1, X2, X1.sub(X2), X1.rsub(X2), sep = "\n\n")

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11
e    12    13    14

   col1  col2  col3
a    10    11    12
b    13    14    15
c    16    17    18
d    19    20    21
e    22    23    24

   col1  col2  col3
a   -10   -10   -10
b   -10   -10   -10
c   -10   -10   -10
d   -10   -10   -10
e   -10   -10   -10

   col1  col2  col3
a    10    10    10
b    10    10    10
c    10    10    10
d    10    10    10
e    10    10    10


# Function Mapping

In [139]:
X1 = pd.DataFrame(np.arange(12).reshape(4,3), index = ['a','b','c','d'], columns = ['col1', 'col2', 'col3'])
X1

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [140]:
X2 = X1.apply(lambda x:x * 100 )
X2

Unnamed: 0,col1,col2,col3
a,0,100,200
b,300,400,500
c,600,700,800
d,900,1000,1100


In [141]:
def fn(x):
    return (pd.Series([max(x), min(x), sum(x)], index=['max', 'min', 'sum']))

X2 = X1.apply(fn)
print(X1, X2, sep = "\n\n")
            

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11

     col1  col2  col3
max     9    10    11
min     0     1     2
sum    18    22    26


In [142]:
X2 = X1.apply(fn, axis = "columns")
print(X1, X2, sep = "\n\n")
            

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11

   max  min  sum
a    2    0    3
b    5    3   12
c    8    6   21
d   11    9   30


In [143]:
def fn(x):
    return x*10

X2 = X1.applymap(fn)
print(X1, X2, sep = "\n\n")

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11

   col1  col2  col3
a     0    10    20
b    30    40    50
c    60    70    80
d    90   100   110


In [144]:
def fn(x):
    return x/10

X2 = X1.applymap(fn)
print(X1, X2, sep = "\n\n")

   col1  col2  col3
a     0     1     2
b     3     4     5
c     6     7     8
d     9    10    11

   col1  col2  col3
a   0.0   0.1   0.2
b   0.3   0.4   0.5
c   0.6   0.7   0.8
d   0.9   1.0   1.1


# Sorting and Ranking

In [150]:
X1 = pd.DataFrame([[50, 20, 40], [10, 30, 50],[20, 8, 10], [5, 13, 15]], index = ['b', 'a', 'd', 'c'], columns = ['col1', 'col3', 'col2'])
X1

Unnamed: 0,col1,col3,col2
b,50,20,40
a,10,30,50
d,20,8,10
c,5,13,15


In [151]:
X1.sort_index()

Unnamed: 0,col1,col3,col2
a,10,30,50
b,50,20,40
c,5,13,15
d,20,8,10


In [152]:
X1.sort_index(axis=1)

Unnamed: 0,col1,col2,col3
b,50,40,20
a,10,50,30
d,20,10,8
c,5,15,13


In [153]:
X1.sort_values(by = "col1")

Unnamed: 0,col1,col3,col2
c,5,13,15
a,10,30,50
d,20,8,10
b,50,20,40


In [154]:
X1.sort_values(by = ["col1","col3"])

Unnamed: 0,col1,col3,col2
c,5,13,15
a,10,30,50
d,20,8,10
b,50,20,40


In [155]:
X1

Unnamed: 0,col1,col3,col2
b,50,20,40
a,10,30,50
d,20,8,10
c,5,13,15


In [156]:
X1.rank()

Unnamed: 0,col1,col3,col2
b,4.0,3.0,3.0
a,2.0,4.0,4.0
d,3.0,1.0,1.0
c,1.0,2.0,2.0


In [157]:
X1.rank(axis = 1)

Unnamed: 0,col1,col3,col2
b,3.0,1.0,2.0
a,1.0,2.0,3.0
d,3.0,1.0,2.0
c,1.0,2.0,3.0


In [161]:
X = pd.Series(np.random.rand(12), index =['a','b','c','d','e','f','g','h','i','j','k','l'])
X

a    0.498646
b    0.013109
c    0.645263
d    0.306879
e    0.075255
f    0.097983
g    0.104139
h    0.212990
i    0.265970
j    0.327000
k    0.627143
l    0.011473
dtype: float64

In [165]:
X.rank(ascending = False)

a     3.0
b    11.0
c     1.0
d     5.0
e    10.0
f     9.0
g     8.0
h     7.0
i     6.0
j     4.0
k     2.0
l    12.0
dtype: float64

# Summarizing Data

In [181]:
X = pd.DataFrame(np.random.rand(8,5), columns = ['col1','col2', 'col3', 'col4', 'col5'])
X

Unnamed: 0,col1,col2,col3,col4,col5
0,0.959397,0.111669,0.530688,0.305971,0.959754
1,0.236559,0.840884,0.521272,0.30175,0.875042
2,0.241134,0.237397,0.11169,0.379113,0.075638
3,0.930785,0.281568,0.83717,0.033122,0.91245
4,0.358379,0.970793,0.243826,0.513723,0.382115
5,0.68856,0.278896,0.973386,0.960229,0.50325
6,0.325632,0.52352,0.247764,0.759337,0.77951
7,0.31646,0.771447,0.190206,0.380721,0.8209


In [182]:
X.describe()

Unnamed: 0,col1,col2,col3,col4,col5
count,8.0,8.0,8.0,8.0,8.0
mean,0.507113,0.502022,0.457,0.454246,0.663582
std,0.305271,0.322582,0.316315,0.289402,0.312402
min,0.236559,0.111669,0.11169,0.033122,0.075638
25%,0.297629,0.268521,0.230421,0.304915,0.472966
50%,0.342005,0.402544,0.384518,0.379917,0.800205
75%,0.749117,0.788806,0.607309,0.575126,0.884394
max,0.959397,0.970793,0.973386,0.960229,0.959754


In [183]:
print(X.mean(), X.max(), X.sum(), sep = "\n\n")

col1    0.507113
col2    0.502022
col3    0.457000
col4    0.454246
col5    0.663582
dtype: float64

col1    0.959397
col2    0.970793
col3    0.973386
col4    0.960229
col5    0.959754
dtype: float64

col1    4.056907
col2    4.016173
col3    3.656002
col4    3.633966
col5    5.308659
dtype: float64


In [184]:
print(X.mean(axis = 1), X.max(axis = 1), X.sum(axis = 1), sep = "\n\n")

0    0.573496
1    0.555101
2    0.208994
3    0.599019
4    0.493767
5    0.680864
6    0.527152
7    0.495947
dtype: float64

0    0.959754
1    0.875042
2    0.379113
3    0.930785
4    0.970793
5    0.973386
6    0.779510
7    0.820900
dtype: float64

0    2.867478
1    2.775506
2    1.044972
3    2.995097
4    2.468836
5    3.404322
6    2.635762
7    2.479734
dtype: float64


In [185]:
print(X.corr(), X.cov(), sep = "\n\n")

          col1      col2      col3      col4      col5
col1  1.000000 -0.662566  0.714705 -0.230629  0.449842
col2 -0.662566  1.000000 -0.407337  0.031611  0.007758
col3  0.714705 -0.407337  1.000000  0.077251  0.352789
col4 -0.230629  0.031611  0.077251  1.000000 -0.333175
col5  0.449842  0.007758  0.352789 -0.333175  1.000000

          col1      col2      col3      col4      col5
col1  0.093190 -0.065246  0.069013 -0.020375  0.042900
col2 -0.065246  0.104059 -0.041564  0.002951  0.000782
col3  0.069013 -0.041564  0.100055  0.007072  0.034862
col4 -0.020375  0.002951  0.007072  0.083753 -0.030122
col5  0.042900  0.000782  0.034862 -0.030122  0.097595


In [187]:
X["col1"].corr(X["col2"])


-0.6625663793192692

In [188]:
X["col1"].cov(X["col2"])

-0.06524609668711036

In [189]:
print(X, X.idxmax(), X.idxmin(), X.idxmax(axis = 1), X.idxmin(axis = 1), sep = "\n\n")

       col1      col2      col3      col4      col5
0  0.959397  0.111669  0.530688  0.305971  0.959754
1  0.236559  0.840884  0.521272  0.301750  0.875042
2  0.241134  0.237397  0.111690  0.379113  0.075638
3  0.930785  0.281568  0.837170  0.033122  0.912450
4  0.358379  0.970793  0.243826  0.513723  0.382115
5  0.688560  0.278896  0.973386  0.960229  0.503250
6  0.325632  0.523520  0.247764  0.759337  0.779510
7  0.316460  0.771447  0.190206  0.380721  0.820900

col1    0
col2    4
col3    5
col4    5
col5    0
dtype: int64

col1    1
col2    0
col3    2
col4    3
col5    2
dtype: int64

0    col5
1    col5
2    col4
3    col1
4    col2
5    col3
6    col5
7    col5
dtype: object

0    col2
1    col1
2    col5
3    col4
4    col3
5    col2
6    col3
7    col3
dtype: object


In [190]:
X = pd.DataFrame(np.arange(16).reshape(4,4), index = ['a','b','c','d'], columns = ['a1','b1','c1','d1'])
print(X)

   a1  b1  c1  d1
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


In [191]:
print(X - X.loc['a'])


   a1  b1  c1  d1
a   0   0   0   0
b   4   4   4   4
c   8   8   8   8
d  12  12  12  12


In [192]:
X = pd.DataFrame(np.arange(16).reshape(4,4), index = ['a','b','c','d'], columns = ['a1','b1','c1','d1'])
print(X,sep="\n\n")

   a1  b1  c1  d1
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


In [193]:
def fn(x):
    return x.max()-x.min()
 
X.apply(fn)

a1    12
b1    12
c1    12
d1    12
dtype: int64

In [194]:
X = pd.DataFrame(np.arange(16).reshape(4,4), index = ['a','b','c','d'], columns = ['a1','b1','c1','d1'])
print(X,sep="\n\n")

   a1  b1  c1  d1
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


In [195]:
def fn(x):
    return x.max()
print(X.applymap(fn))

AttributeError: 'int' object has no attribute 'max'

In [196]:
X = pd.DataFrame(np.arange(16).reshape(4,4), index = ['d','b','c','a'], columns = ['d1','b1','c1','a1'])
print(X,sep="\n\n")

   d1  b1  c1  a1
d   0   1   2   3
b   4   5   6   7
c   8   9  10  11
a  12  13  14  15


In [197]:
print(X.mean(axis = 0), X.mean(axis =1), sep="\n\n")

d1    6.0
b1    7.0
c1    8.0
a1    9.0
dtype: float64

d     1.5
b     5.5
c     9.5
a    13.5
dtype: float64
