### NUMPY

NumPy (Numerical Python) is an open source Python library that's widely used in science and engineering. The NumPy library contains multidimensional array data structures, such as homogeneous, N-dimensional ndarray, and a large library of functions that operate efficiently on these data structures.

In [1]:
import numpy as np

In [2]:
a = np.array([1, 4, 3, 5, 7, 9, 2])

In [3]:
a.shape

(7,)

In [4]:
a[0]

np.int64(1)

In [5]:
a[0] = 2

In [6]:
a

array([2, 4, 3, 5, 7, 9, 2])

In [7]:
b = a[::-1]

In [8]:
b

array([2, 9, 7, 5, 3, 4, 2])

In [9]:
b[0] = 13

In [10]:
b

array([13,  9,  7,  5,  3,  4,  2])

In [11]:
a

array([ 2,  4,  3,  5,  7,  9, 13])

In [12]:
a

array([ 2,  4,  3,  5,  7,  9, 13])

In [13]:
c = a[:3].copy()

In [14]:
c

array([2, 4, 3])

In [15]:
c[0] = 17

In [16]:
c

array([17,  4,  3])

In [17]:
a

array([ 2,  4,  3,  5,  7,  9, 13])

In [18]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [19]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [20]:
a.ndim

2

In [21]:
a.shape

(3, 3)

In [22]:
a.size

9

In [23]:
a.size == a.shape[0] * a.shape[1]

True

In [24]:
a.dtype

dtype('int64')

In [25]:
np.zeros(3)

array([0., 0., 0.])

In [26]:
np.ones(3)

array([1., 1., 1.])

In [27]:
np.arange(2, 6)

array([2, 3, 4, 5])

In [28]:
np.arange(1, 10, 2)

array([1, 3, 5, 7, 9])

In [29]:
np.linspace(0, 10, 2)

array([ 0., 10.])

In [30]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [31]:
np.sort(a)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [32]:
b

array([13,  9,  7,  5,  3,  4,  2])

In [33]:
np.sort(b)

array([ 2,  3,  4,  5,  7,  9, 13])

In [34]:
b

array([13,  9,  7,  5,  3,  4,  2])

In [35]:
c

array([17,  4,  3])

In [36]:
np.concatenate((b, c))

array([13,  9,  7,  5,  3,  4,  2, 17,  4,  3])

In [37]:
b

array([13,  9,  7,  5,  3,  4,  2])

In [38]:
b.sort()

In [39]:
b

array([ 2,  3,  4,  5,  7,  9, 13])

In [40]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [41]:
a[0:3:2, 0:3:2]

array([[1, 3],
       [7, 9]])

In [42]:
a[a%2==0]

array([2, 4, 6, 8])

In [43]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [44]:
a%2==0

array([[False,  True, False],
       [ True, False,  True],
       [False,  True, False]])

In [45]:
m = np.array([1, 2, 3, 4])
n = np.array([5, 6, 7, 8])

In [46]:
coor = list(zip(m, n))

In [47]:
for cord in coor:
        print(cord)

(np.int64(1), np.int64(5))
(np.int64(2), np.int64(6))
(np.int64(3), np.int64(7))
(np.int64(4), np.int64(8))


In [48]:
z = np.linspace(1, 10, 9).reshape(3, 3)

In [49]:
z

array([[ 1.   ,  2.125,  3.25 ],
       [ 4.375,  5.5  ,  6.625],
       [ 7.75 ,  8.875, 10.   ]])

In [50]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [51]:
np.vstack((a, z))

array([[ 1.   ,  2.   ,  3.   ],
       [ 4.   ,  5.   ,  6.   ],
       [ 7.   ,  8.   ,  9.   ],
       [ 1.   ,  2.125,  3.25 ],
       [ 4.375,  5.5  ,  6.625],
       [ 7.75 ,  8.875, 10.   ]])

In [52]:
np.hstack((a, z))

array([[ 1.   ,  2.   ,  3.   ,  1.   ,  2.125,  3.25 ],
       [ 4.   ,  5.   ,  6.   ,  4.375,  5.5  ,  6.625],
       [ 7.   ,  8.   ,  9.   ,  7.75 ,  8.875, 10.   ]])

In [53]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [54]:
a.sum(axis = 1)

array([ 6, 15, 24])

In [55]:
a.min()

np.int64(1)

In [56]:
a.max()

np.int64(9)

In [57]:
a.mean()

np.float64(5.0)

In [58]:
a.min(axis = 0)

array([1, 2, 3])

In [59]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [60]:
a[1:3: 2, 0:2]

array([[4, 5]])

In [61]:
a.flatten()

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [62]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [63]:
a.ravel()

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [64]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [65]:
a.flatten()[0] = 99

In [66]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [67]:
a.ravel()[0] = 99

In [68]:
a

array([[99,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [69]:
np.full((2, 3), 3)

array([[3, 3, 3],
       [3, 3, 3]])

In [70]:
np.random.rand(3, 2)

array([[0.54670192, 0.59827822],
       [0.70458245, 0.58031376],
       [0.7238098 , 0.09638222]])

In [71]:
np.random.randint(2, 1000, 3)

array([405, 870,  92], dtype=int32)

In [72]:
np.random.randn(2, 3, 2)

array([[[ 0.28172725,  0.59223315],
        [-1.28596682,  0.44914136],
        [-0.98221888,  0.09700801]],

       [[ 0.05218262,  0.9305554 ],
        [ 1.80589278, -0.23894767],
        [ 0.37914526, -0.4263    ]]])

In [73]:
np.random.random((2, 3))

array([[0.70168617, 0.99471731, 0.09782953],
       [0.9127286 , 0.45559582, 0.8559838 ]])

In [74]:
np.empty((2, 2))

array([[2.37858534e+184, 1.77296457e+160],
       [4.27073521e-090, 1.81501156e-052]])

In [75]:
a

array([[99,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [76]:
a.view()

array([[99,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [77]:
a.dtype.name

'int64'

In [78]:
a**2

array([[9801,    4,    9],
       [  16,   25,   36],
       [  49,   64,   81]])

In [79]:
np.square(a)

array([[9801,    4,    9],
       [  16,   25,   36],
       [  49,   64,   81]])

In [80]:
a.sum()

np.int64(143)

In [81]:
a.cumsum(axis = 0)

array([[ 99,   2,   3],
       [103,   7,   9],
       [110,  15,  18]])

In [82]:
a.sum(axis = 0)

array([110,  15,  18])

In [83]:
b


array([ 2,  3,  4,  5,  7,  9, 13])

In [84]:
np.append(b, 99)

array([ 2,  3,  4,  5,  7,  9, 13, 99])

In [85]:
b

array([ 2,  3,  4,  5,  7,  9, 13])

In [86]:
z

array([[ 1.   ,  2.125,  3.25 ],
       [ 4.375,  5.5  ,  6.625],
       [ 7.75 ,  8.875, 10.   ]])

In [87]:
np.r_[a, z]

array([[99.   ,  2.   ,  3.   ],
       [ 4.   ,  5.   ,  6.   ],
       [ 7.   ,  8.   ,  9.   ],
       [ 1.   ,  2.125,  3.25 ],
       [ 4.375,  5.5  ,  6.625],
       [ 7.75 ,  8.875, 10.   ]])

In [88]:
np.c_[a, z]

array([[99.   ,  2.   ,  3.   ,  1.   ,  2.125,  3.25 ],
       [ 4.   ,  5.   ,  6.   ,  4.375,  5.5  ,  6.625],
       [ 7.   ,  8.   ,  9.   ,  7.75 ,  8.875, 10.   ]])

In [89]:
a

array([[99,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [90]:
a[[0, 1, 2],[2, 1, 0]]

array([3, 5, 7])

In [91]:
a[[0, 1, 1]][:,[0, 2, 0]]

array([[99,  3, 99],
       [ 4,  6,  4],
       [ 4,  6,  4]])

In [92]:
a[[0, 1, 1]]

array([[99,  2,  3],
       [ 4,  5,  6],
       [ 4,  5,  6]])

### PANDAS

Pandas is an open source python package that provides fast, flexible and expressive data structures designed to make working with relational or labelled data both easy and intuiutive.

In [93]:
import pandas as pd

In [94]:
s = pd.Series([1, 2, 3, np.nan])

In [95]:
s

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64

In [96]:
df = pd.DataFrame(np.random.randn(6, 4), index = np.arange(1, 13, 2), columns = list("ABCD"))

In [97]:
df

Unnamed: 0,A,B,C,D
1,-0.917598,0.75022,2.490811,-1.428422
3,0.484263,-0.377896,-0.323125,-0.712956
5,-0.115701,0.916827,-0.98043,1.246285
7,0.988036,-0.61568,-0.557416,-0.934982
9,1.570974,-1.784536,-0.91211,-0.845125
11,1.219863,0.57421,0.743674,-0.327234


In [98]:
s1 = pd.Series(np.random.randn(100))

In [99]:
s1

0     0.492046
1    -1.167517
2    -0.738657
3     0.803280
4     0.618619
        ...   
95   -0.296199
96    0.037243
97   -2.214895
98   -1.802340
99   -0.039655
Length: 100, dtype: float64

In [100]:
df1 = pd.DataFrame(np.linspace(1, 10, 20).reshape(4, 5), index = np.arange(1, 5), columns = list("ABCDE"))

In [101]:
df1

Unnamed: 0,A,B,C,D,E
1,1.0,1.473684,1.947368,2.421053,2.894737
2,3.368421,3.842105,4.315789,4.789474,5.263158
3,5.736842,6.210526,6.684211,7.157895,7.631579
4,8.105263,8.578947,9.052632,9.526316,10.0


In [102]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1 to 4
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       4 non-null      float64
 1   B       4 non-null      float64
 2   C       4 non-null      float64
 3   D       4 non-null      float64
 4   E       4 non-null      float64
dtypes: float64(5)
memory usage: 192.0 bytes


In [103]:
df1.dtypes

A    float64
B    float64
C    float64
D    float64
E    float64
dtype: object

In [104]:
df1.index

Index([1, 2, 3, 4], dtype='int64')

In [105]:
df1.columns

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [106]:
df1

Unnamed: 0,A,B,C,D,E
1,1.0,1.473684,1.947368,2.421053,2.894737
2,3.368421,3.842105,4.315789,4.789474,5.263158
3,5.736842,6.210526,6.684211,7.157895,7.631579
4,8.105263,8.578947,9.052632,9.526316,10.0


In [107]:
df1.to_numpy()

array([[ 1.        ,  1.47368421,  1.94736842,  2.42105263,  2.89473684],
       [ 3.36842105,  3.84210526,  4.31578947,  4.78947368,  5.26315789],
       [ 5.73684211,  6.21052632,  6.68421053,  7.15789474,  7.63157895],
       [ 8.10526316,  8.57894737,  9.05263158,  9.52631579, 10.        ]])

In [108]:
df1.describe()

Unnamed: 0,A,B,C,D,E
count,4.0,4.0,4.0,4.0,4.0
mean,4.552632,5.026316,5.5,5.973684,6.447368
std,3.057618,3.057618,3.057618,3.057618,3.057618
min,1.0,1.473684,1.947368,2.421053,2.894737
25%,2.776316,3.25,3.723684,4.197368,4.671053
50%,4.552632,5.026316,5.5,5.973684,6.447368
75%,6.328947,6.802632,7.276316,7.75,8.223684
max,8.105263,8.578947,9.052632,9.526316,10.0


In [109]:
df1.T

Unnamed: 0,1,2,3,4
A,1.0,3.368421,5.736842,8.105263
B,1.473684,3.842105,6.210526,8.578947
C,1.947368,4.315789,6.684211,9.052632
D,2.421053,4.789474,7.157895,9.526316
E,2.894737,5.263158,7.631579,10.0


In [110]:
df1

Unnamed: 0,A,B,C,D,E
1,1.0,1.473684,1.947368,2.421053,2.894737
2,3.368421,3.842105,4.315789,4.789474,5.263158
3,5.736842,6.210526,6.684211,7.157895,7.631579
4,8.105263,8.578947,9.052632,9.526316,10.0


In [111]:
df.sort_index(axis = 1, ascending =  False)

Unnamed: 0,D,C,B,A
1,-1.428422,2.490811,0.75022,-0.917598
3,-0.712956,-0.323125,-0.377896,0.484263
5,1.246285,-0.98043,0.916827,-0.115701
7,-0.934982,-0.557416,-0.61568,0.988036
9,-0.845125,-0.91211,-1.784536,1.570974
11,-0.327234,0.743674,0.57421,1.219863


In [112]:
df

Unnamed: 0,A,B,C,D
1,-0.917598,0.75022,2.490811,-1.428422
3,0.484263,-0.377896,-0.323125,-0.712956
5,-0.115701,0.916827,-0.98043,1.246285
7,0.988036,-0.61568,-0.557416,-0.934982
9,1.570974,-1.784536,-0.91211,-0.845125
11,1.219863,0.57421,0.743674,-0.327234


In [113]:
df.sort_values(by = "B")

Unnamed: 0,A,B,C,D
9,1.570974,-1.784536,-0.91211,-0.845125
7,0.988036,-0.61568,-0.557416,-0.934982
3,0.484263,-0.377896,-0.323125,-0.712956
11,1.219863,0.57421,0.743674,-0.327234
1,-0.917598,0.75022,2.490811,-1.428422
5,-0.115701,0.916827,-0.98043,1.246285


In [114]:
df

Unnamed: 0,A,B,C,D
1,-0.917598,0.75022,2.490811,-1.428422
3,0.484263,-0.377896,-0.323125,-0.712956
5,-0.115701,0.916827,-0.98043,1.246285
7,0.988036,-0.61568,-0.557416,-0.934982
9,1.570974,-1.784536,-0.91211,-0.845125
11,1.219863,0.57421,0.743674,-0.327234


In [115]:
df[0:1]

Unnamed: 0,A,B,C,D
1,-0.917598,0.75022,2.490811,-1.428422


In [116]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
1,0.75022,2.490811
3,-0.377896,-0.323125
5,0.916827,-0.98043
7,-0.61568,-0.557416
9,-1.784536,-0.91211
11,0.57421,0.743674


In [117]:
df.loc[1:3, 'A':'C']

Unnamed: 0,A,B,C
1,-0.917598,0.75022,2.490811
3,0.484263,-0.377896,-0.323125


## LOC is used for selection by labels whereas ILOC is used for selection by position.

In [118]:
df


Unnamed: 0,A,B,C,D
1,-0.917598,0.75022,2.490811,-1.428422
3,0.484263,-0.377896,-0.323125,-0.712956
5,-0.115701,0.916827,-0.98043,1.246285
7,0.988036,-0.61568,-0.557416,-0.934982
9,1.570974,-1.784536,-0.91211,-0.845125
11,1.219863,0.57421,0.743674,-0.327234


In [119]:
df>0

Unnamed: 0,A,B,C,D
1,False,True,True,False
3,True,False,False,False
5,False,True,False,True
7,True,False,False,False
9,True,False,False,False
11,True,True,True,False


In [120]:
df[df>0]

Unnamed: 0,A,B,C,D
1,,0.75022,2.490811,
3,0.484263,,,
5,,0.916827,,1.246285
7,0.988036,,,
9,1.570974,,,
11,1.219863,0.57421,0.743674,


In [121]:
df[df['A']>0]

Unnamed: 0,A,B,C,D
3,0.484263,-0.377896,-0.323125,-0.712956
7,0.988036,-0.61568,-0.557416,-0.934982
9,1.570974,-1.784536,-0.91211,-0.845125
11,1.219863,0.57421,0.743674,-0.327234


In [122]:
df

Unnamed: 0,A,B,C,D
1,-0.917598,0.75022,2.490811,-1.428422
3,0.484263,-0.377896,-0.323125,-0.712956
5,-0.115701,0.916827,-0.98043,1.246285
7,0.988036,-0.61568,-0.557416,-0.934982
9,1.570974,-1.784536,-0.91211,-0.845125
11,1.219863,0.57421,0.743674,-0.327234


In [123]:
df.at[1, 'A'] = 1 

In [124]:
df

Unnamed: 0,A,B,C,D
1,1.0,0.75022,2.490811,-1.428422
3,0.484263,-0.377896,-0.323125,-0.712956
5,-0.115701,0.916827,-0.98043,1.246285
7,0.988036,-0.61568,-0.557416,-0.934982
9,1.570974,-1.784536,-0.91211,-0.845125
11,1.219863,0.57421,0.743674,-0.327234


In [125]:
df.loc[:, 'D'] = np.array([5] * len(df))

In [126]:
df

Unnamed: 0,A,B,C,D
1,1.0,0.75022,2.490811,5.0
3,0.484263,-0.377896,-0.323125,5.0
5,-0.115701,0.916827,-0.98043,5.0
7,0.988036,-0.61568,-0.557416,5.0
9,1.570974,-1.784536,-0.91211,5.0
11,1.219863,0.57421,0.743674,5.0


In [127]:
df

Unnamed: 0,A,B,C,D
1,1.0,0.75022,2.490811,5.0
3,0.484263,-0.377896,-0.323125,5.0
5,-0.115701,0.916827,-0.98043,5.0
7,0.988036,-0.61568,-0.557416,5.0
9,1.570974,-1.784536,-0.91211,5.0
11,1.219863,0.57421,0.743674,5.0


In [128]:
df.index

Index([1, 3, 5, 7, 9, 11], dtype='int64')

In [129]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [130]:
df2 = df.reindex(index = df.index, columns = list(df.columns) + ['E'])

In [131]:
df2.loc[1:5, 'E'] = 1

In [132]:
df2

Unnamed: 0,A,B,C,D,E
1,1.0,0.75022,2.490811,5.0,1.0
3,0.484263,-0.377896,-0.323125,5.0,1.0
5,-0.115701,0.916827,-0.98043,5.0,1.0
7,0.988036,-0.61568,-0.557416,5.0,
9,1.570974,-1.784536,-0.91211,5.0,
11,1.219863,0.57421,0.743674,5.0,


In [133]:
df2.loc[5:7, 'D'] = np.nan

In [134]:
df2

Unnamed: 0,A,B,C,D,E
1,1.0,0.75022,2.490811,5.0,1.0
3,0.484263,-0.377896,-0.323125,5.0,1.0
5,-0.115701,0.916827,-0.98043,,1.0
7,0.988036,-0.61568,-0.557416,,
9,1.570974,-1.784536,-0.91211,5.0,
11,1.219863,0.57421,0.743674,5.0,


In [135]:
df2.dropna(axis = 0, thresh = 4)

Unnamed: 0,A,B,C,D,E
1,1.0,0.75022,2.490811,5.0,1.0
3,0.484263,-0.377896,-0.323125,5.0,1.0
5,-0.115701,0.916827,-0.98043,,1.0
9,1.570974,-1.784536,-0.91211,5.0,
11,1.219863,0.57421,0.743674,5.0,


In [136]:
pd.isnull(df2)

Unnamed: 0,A,B,C,D,E
1,False,False,False,False,False
3,False,False,False,False,False
5,False,False,False,True,False
7,False,False,False,True,True
9,False,False,False,False,True
11,False,False,False,False,True


In [137]:
pd.isna(df2)

Unnamed: 0,A,B,C,D,E
1,False,False,False,False,False
3,False,False,False,False,False
5,False,False,False,True,False
7,False,False,False,True,True
9,False,False,False,False,True
11,False,False,False,False,True


In [138]:
df

Unnamed: 0,A,B,C,D
1,1.0,0.75022,2.490811,5.0
3,0.484263,-0.377896,-0.323125,5.0
5,-0.115701,0.916827,-0.98043,5.0
7,0.988036,-0.61568,-0.557416,5.0
9,1.570974,-1.784536,-0.91211,5.0
11,1.219863,0.57421,0.743674,5.0


In [139]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.857906,-0.089476,0.076901,5.0
std,0.59399,1.038414,1.336414,0.0
min,-0.115701,-1.784536,-0.98043,5.0
25%,0.610207,-0.556234,-0.823437,5.0
50%,0.994018,0.098157,-0.440271,5.0
75%,1.164897,0.706217,0.476974,5.0
max,1.570974,0.916827,2.490811,5.0


In [140]:
df.mean(axis = 1)

1     2.310258
3     1.195811
5     1.205174
7     1.203735
9     0.968582
11    1.884437
dtype: float64

In [141]:
s = pd.Series(np.random.randint(0, 7, 20))

In [142]:
s

0     6
1     1
2     0
3     4
4     3
5     3
6     2
7     5
8     3
9     6
10    0
11    0
12    6
13    4
14    3
15    2
16    3
17    2
18    3
19    3
dtype: int32

In [143]:
s.value_counts()

3    7
0    3
6    3
2    3
4    2
1    1
5    1
Name: count, dtype: int64

In [144]:
df3 = pd.DataFrame(np.random.randint(0, 7, 12).reshape(3, 4))

In [145]:
df3

Unnamed: 0,0,1,2,3
0,0,0,4,1
1,3,5,0,3
2,4,1,5,1


In [146]:
s = pd.Series(np.random.randint(7, 8, 3))

In [147]:
s

0    7
1    7
2    7
dtype: int32

In [148]:
s.info

<bound method Series.info of 0    7
1    7
2    7
dtype: int32>

In [149]:
df3.info

<bound method DataFrame.info of    0  1  2  3
0  0  0  4  1
1  3  5  0  3
2  4  1  5  1>

In [150]:
pd.concat([df3, s], axis= 1)

Unnamed: 0,0,1,2,3,0.1
0,0,0,4,1,7
1,3,5,0,3,7
2,4,1,5,1,7


In [151]:
df3


Unnamed: 0,0,1,2,3
0,0,0,4,1
1,3,5,0,3
2,4,1,5,1


In [152]:
df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)

In [153]:
df

Unnamed: 0,A,B,C,D
0,foo,one,0.399486,0.195168
1,bar,one,-1.566453,-0.520549
2,foo,two,0.427047,0.245133
3,bar,three,-0.769731,-1.561727
4,foo,two,0.170324,0.151721
5,bar,two,-0.6825,-1.84312
6,foo,one,0.773847,-0.086628
7,foo,three,0.199398,-0.037084


In [154]:
df.groupby("A", sort = False).sum()

Unnamed: 0_level_0,B,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
foo,onetwotwoonethree,1.970102,0.468309
bar,onethreetwo,-3.018684,-3.925396


In [155]:
df

Unnamed: 0,A,B,C,D
0,foo,one,0.399486,0.195168
1,bar,one,-1.566453,-0.520549
2,foo,two,0.427047,0.245133
3,bar,three,-0.769731,-1.561727
4,foo,two,0.170324,0.151721
5,bar,two,-0.6825,-1.84312
6,foo,one,0.773847,-0.086628
7,foo,three,0.199398,-0.037084


In [156]:
df.stack()

0  A         foo
   B         one
   C    0.399486
   D    0.195168
1  A         bar
   B         one
   C   -1.566453
   D   -0.520549
2  A         foo
   B         two
   C    0.427047
   D    0.245133
3  A         bar
   B       three
   C   -0.769731
   D   -1.561727
4  A         foo
   B         two
   C    0.170324
   D    0.151721
5  A         bar
   B         two
   C     -0.6825
   D    -1.84312
6  A         foo
   B         one
   C    0.773847
   D   -0.086628
7  A         foo
   B       three
   C    0.199398
   D   -0.037084
dtype: object

In [157]:
df

Unnamed: 0,A,B,C,D
0,foo,one,0.399486,0.195168
1,bar,one,-1.566453,-0.520549
2,foo,two,0.427047,0.245133
3,bar,three,-0.769731,-1.561727
4,foo,two,0.170324,0.151721
5,bar,two,-0.6825,-1.84312
6,foo,one,0.773847,-0.086628
7,foo,three,0.199398,-0.037084


In [158]:
df.unstack()

A  0         foo
   1         bar
   2         foo
   3         bar
   4         foo
   5         bar
   6         foo
   7         foo
B  0         one
   1         one
   2         two
   3       three
   4         two
   5         two
   6         one
   7       three
C  0    0.399486
   1   -1.566453
   2    0.427047
   3   -0.769731
   4    0.170324
   5     -0.6825
   6    0.773847
   7    0.199398
D  0    0.195168
   1   -0.520549
   2    0.245133
   3   -1.561727
   4    0.151721
   5    -1.84312
   6   -0.086628
   7   -0.037084
dtype: object

In [159]:
df

Unnamed: 0,A,B,C,D
0,foo,one,0.399486,0.195168
1,bar,one,-1.566453,-0.520549
2,foo,two,0.427047,0.245133
3,bar,three,-0.769731,-1.561727
4,foo,two,0.170324,0.151721
5,bar,two,-0.6825,-1.84312
6,foo,one,0.773847,-0.086628
7,foo,three,0.199398,-0.037084


In [160]:
df.unstack(2)

A  0         foo
   1         bar
   2         foo
   3         bar
   4         foo
   5         bar
   6         foo
   7         foo
B  0         one
   1         one
   2         two
   3       three
   4         two
   5         two
   6         one
   7       three
C  0    0.399486
   1   -1.566453
   2    0.427047
   3   -0.769731
   4    0.170324
   5     -0.6825
   6    0.773847
   7    0.199398
D  0    0.195168
   1   -0.520549
   2    0.245133
   3   -1.561727
   4    0.151721
   5    -1.84312
   6   -0.086628
   7   -0.037084
dtype: object

In [170]:
s = pd.Series(np.random.randint(0, 7, 3), index = np.arange(1, 6, 2))

In [171]:
s

1    3
3    3
5    2
dtype: int32

In [174]:
s.reindex(range(6), method='ffill')

0    NaN
1    3.0
2    3.0
3    3.0
4    3.0
5    2.0
dtype: float64

In [175]:
s

1    3
3    3
5    2
dtype: int32

In [176]:
df = pd.DataFrame(np.random.randint(1, 10, 12).reshape(3, 4), columns = list("abcd"), index = list("PQR"))

In [177]:
df

Unnamed: 0,a,b,c,d
P,9,6,2,4
Q,9,3,3,7
R,2,1,2,9


In [184]:
df.a.corr(df.c)

np.float64(0.5)

In [185]:
df.corr()

Unnamed: 0,a,b,c,d
a,1.0,0.802955,0.5,-0.802955
b,0.802955,1.0,-0.114708,-1.0
c,0.5,-0.114708,1.0,0.114708
d,-0.802955,-1.0,0.114708,1.0


In [186]:
 obj = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])

In [187]:
obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [196]:
obj.value_counts()

c    3
a    3
b    2
d    1
Name: count, dtype: int64

In [192]:
df.index

Index(['P', 'Q', 'R'], dtype='object')

In [190]:
df.columns

Index(['a', 'b', 'c', 'd'], dtype='object')

In [202]:
obj.unique()

array(['c', 'a', 'd', 'b'], dtype=object)

In [205]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
z = zip(a, b)

In [206]:
for x in z:
    print(x)

(np.int64(1), np.int64(4))
(np.int64(2), np.int64(5))
(np.int64(3), np.int64(6))


In [207]:
(1+7+3.5)/3

3.8333333333333335

In [208]:
obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [209]:
obj.duplicated()

0    False
1    False
2    False
3     True
4     True
5    False
6     True
7     True
8     True
dtype: bool

In [216]:
obj.drop_duplicates(inplace=True)

In [217]:
obj

0    c
1    a
2    d
5    b
dtype: object

In [220]:
obj.reindex(np.arange(0, 6), method = 'ffill')

0    c
1    a
2    d
3    d
4    d
5    b
dtype: object

In [221]:
obj

0    c
1    a
2    d
5    b
dtype: object

In [222]:
obj.rename(index = {5:3}, inplace = True)

In [223]:
obj

0    c
1    a
2    d
3    b
dtype: object

In [227]:
obj.info

<bound method Series.info of 0    c
1    a
2    d
3    b
dtype: object>

In [226]:
obj.values

array(['c', 'a', 'd', 'b'], dtype=object)

In [228]:
 df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],              
'key2' : ['one', 'two', 'one', 'two', 'one'],   
'data1' : np.random.randn(5),
 'data2' : np.random.randn(5)})

In [229]:
df

Unnamed: 0,key1,key2,data1,data2
0,a,one,-0.097825,0.115906
1,a,two,0.481601,1.173341
2,b,one,0.829511,-1.27357
3,b,two,2.717847,0.121548
4,a,one,-0.043533,-0.389565


In [239]:
df['data1'].groupby(df.key1).mean()

key1
a    0.113414
b    1.773679
Name: data1, dtype: float64

In [242]:
df['data1'].mean(axis = 0)

np.float64(0.7775202807373465)

In [245]:
df.groupby(['key1', 'key2']).size()

key1  key2
a     one     2
      two     1
b     one     1
      two     1
dtype: int64

In [248]:
for name, group in df.groupby('key1'):
    print(name)    
    print(group)

a
  key1 key2     data1     data2
0    a  one -0.097825  0.115906
1    a  two  0.481601  1.173341
4    a  one -0.043533 -0.389565
b
  key1 key2     data1     data2
2    b  one  0.829511 -1.273570
3    b  two  2.717847  0.121548


In [249]:
df['data1'].groupby(df.key1).mean()

key1
a    0.113414
b    1.773679
Name: data1, dtype: float64

In [257]:
df.drop('key2', axis = 1, inplace=True)

In [258]:
df

Unnamed: 0,key1,data1,data2
0,a,-0.097825,0.115906
1,a,0.481601,1.173341
2,b,0.829511,-1.27357
3,b,2.717847,0.121548
4,a,-0.043533,-0.389565


In [261]:
df.groupby('key1')['data1'].mean()

key1
a    0.113414
b    1.773679
Name: data1, dtype: float64

In [263]:
df.groupby('key1').max()

Unnamed: 0_level_0,data1,data2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.481601,1.173341
b,2.717847,0.121548
