# 1. How to import pandas and check the version?

In [1]:
import pandas as pd

pd.__version__

'1.3.3'

# 2. How to create a series from a list, numpy array and dict?

In [12]:
import numpy as np

mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

ser_list = pd.Series(mylist)
ser_array = pd.Series(myarr)
ser_dict = pd.Series(mydict)



# 3. How to convert the index of a series into a column of a dataframe?

In [19]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

print(ser)
ser.index

df = ser.to_frame().reset_index()

print(df)

a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int32
   index   0
0      a   0
1      b   1
2      c   2
3      e   3
4      d   4
5      f   5
6      g   6
7      h   7
8      i   8
9      j   9
10     k  10
11     l  11
12     m  12
13     n  13
14     o  14
15     p  15
16     q  16
17     r  17
18     s  18
19     t  19
20     u  20
21     v  21
22     w  22
23     x  23
24     y  24
25     z  25


# 4. How to combine many series to form a dataframe?

In [20]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [25]:
df = pd.concat([ser1, ser2], axis=1)
print(df)b

    0   1
0   a   0
1   b   1
2   c   2
3   e   3
4   d   4
5   f   5
6   g   6
7   h   7
8   i   8
9   j   9
10  k  10
11  l  11
12  m  12
13  n  13
14  o  14
15  p  15
16  q  16
17  r  17
18  s  18
19  t  19
20  u  20
21  v  21
22  w  22
23  x  23
24  y  24
25  z  25


In [27]:
df = pd.DataFrame({'col1': ser1, 'col2': ser2})
print(df.head())

  col1  col2
0    a     0
1    b     1
2    c     2
3    e     3
4    d     4


# 5. How to assign name to the series’ index?

In [36]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

ser.name = "alphabets"

ser.head


<bound method NDFrame.head of 0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
Name: alphabets, dtype: object>

# 6. How to get the items of series A not present in series B?

In [37]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [42]:
ser1[~ser1.isin(ser2)]b

0    1
1    2
2    3
dtype: int64

# 7. How to get the items not common to both series A and series B?

In [46]:
ser3 = pd.concat([ser1[~ser1.isin(ser2)], ser2[~ser2.isin(ser1)]])

print(ser3)

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64


# 8 How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [78]:
ser = pd.Series(np.random.normal(10, 5, 25))

print(ser)

ser_min = print(ser.min())
ser_25 = ser.quantile(q=0.25)
ser_50 = print(ser.quantile(q=0.5))
ser_75 = ser.quantile(q=0.75)
ser_max = ser.max()

np.percentile(ser, q=[0, 25, 50, 75, 100])

0     14.982151
1     17.540535
2     11.075103
3     16.273433
4     -2.311948
5      6.192250
6     17.346257
7      8.253103
8      4.227809
9     11.344694
10    13.033269
11    -3.452792
12     4.385037
13    17.504560
14    11.059109
15    10.375147
16    17.923753
17    15.612901
18    14.414858
19    16.751450
20     3.276853
21     8.570307
22     4.650163
23    13.865059
24     8.751165
dtype: float64
-3.4527922028500058
11.07510250928306


array([-3.4527922 ,  6.19225032, 11.07510251, 15.61290058, 17.92375256])

# 9. How to get frequency counts of unique items of a series?

In [50]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

print(ser)


0     g
1     d
2     b
3     b
4     b
5     g
6     e
7     g
8     b
9     g
10    f
11    d
12    e
13    e
14    b
15    d
16    c
17    g
18    f
19    d
20    e
21    f
22    f
23    h
24    a
25    f
26    a
27    h
28    e
29    e
dtype: object


In [55]:
ser.value_counts()

e    6
g    5
b    5
f    5
d    4
h    2
a    2
c    1
dtype: int64

# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [69]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

In [70]:
ser[~ser.isin(ser.value_counts().index[:2])] = "Others"
ser

0          2
1          2
2     Others
3          2
4          2
5          1
6     Others
7          2
8          2
9          1
10         1
11         1
dtype: object

# 11. How to bin a numeric series to 10 groups of equal size?

In [81]:
ser = pd.Series(np.random.random(20))

print(ser)

0     0.561665
1     0.839222
2     0.760673
3     0.842627
4     0.829856
5     0.200273
6     0.348989
7     0.896200
8     0.711909
9     0.986756
10    0.163716
11    0.811505
12    0.353508
13    0.249590
14    0.420824
15    0.634122
16    0.265340
17    0.821906
18    0.752347
19    0.467999
dtype: float64


In [85]:
pd.qcut(ser, 10, labels = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th'])

0      5th
1      9th
2      7th
3      9th
4      8th
5      1st
6      3rd
7     10th
8      6th
9     10th
10     1st
11     7th
12     3rd
13     2nd
14     4th
15     5th
16     2nd
17     8th
18     6th
19     4th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

# 12. How to convert a numpy array to a dataframe of given shape? (L1)

In [92]:
ser = pd.Series(np.random.randint(1, 10, 35))

df = pd.DataFrame(ser.values.reshape(7,5))

print(df)

   0  1  2  3  4
0  8  4  1  3  9
1  3  6  4  2  7
2  7  8  2  7  4
3  4  8  7  3  4
4  7  8  3  3  7
5  7  6  2  7  1
6  3  8  2  8  5


# 13. How to find the positions of numbers that are multiples of 3 from a series?

In [102]:
ser = pd.Series(np.random.randint(1, 10, 7))
print(ser)

0    6
1    4
2    4
3    7
4    4
5    8
6    6
dtype: int32


In [107]:
res = ser[ser % 3 == 0]

In [115]:
ser % 3 == 0

0     True
1    False
2    False
3    False
4    False
5    False
6     True
dtype: bool

# 15. How to stack two series vertically and horizontally ?

In [110]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

In [114]:
# Vertically

pd.concat([ser1, ser2])



0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object

In [118]:
# Horizontally
pd.concat([ser1, ser2])

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object

In [117]:
pd.concat([ser1, ser2], axis = 1)

Unnamed: 0,0,1
0,0,a
1,1,b
2,2,c
3,3,d
4,4,e


# 16. How to get the positions of items of series A in another series B?

In [119]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

In [121]:
ser1.index(9)

TypeError: 'RangeIndex' object is not callable