In [10]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [None]:
# https://www.machinelearningplus.com/python/101-pandas-exercises-python/

In [3]:
# 1. How to import pandas and check the version?
pd.__version__

'1.1.3'

In [20]:
# How to convert the index of a series into a column of a dataframe?
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)
ser

a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int64

In [32]:
dframe = ser.to_frame()
dframe.reset_index(inplace = True)
dframe

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


In [45]:
# How to combine many series to form a dataframe?
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

pd.concat([ser1,ser2], axis=1)
# or 
dframe = DataFrame({'col1':ser1, 'col2':ser2})
dframe

Unnamed: 0,col1,col2
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


In [51]:
# 5. How to assign name to the series’ index?
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser.index.name = 'NAME'
ser

NAME
0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [58]:
# 6. How to get the items of series A not present in series B?

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
# ~ operator is inverse
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

In [78]:
#  How to get the items not common to both series A and series B?
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# solution 1
# AuB -(AnB)
union = pd.Series(np.union1d(ser1,ser2))
intersect = pd.Series(np.intersect1d(ser1,ser2))
not_common = union[~union.isin(intersect)]
not_common

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

In [81]:
# 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?
ser = pd.Series(np.random.normal(10, 5, 25))
ser
# ser.describe()

0      3.918442
1      9.968635
2      9.528528
3     10.502172
4     10.508859
5      6.984142
6     13.979576
7     18.763097
8     10.207779
9     10.339145
10     9.075308
11    12.532689
12    12.982853
13     8.737806
14     7.699987
15     6.661851
16    11.632816
17    18.761957
18     9.928600
19     2.116245
20    14.019705
21     7.391842
22    12.270907
23     8.752457
24    10.585996
dtype: float64

In [83]:
ser.describe()

count    25.000000
mean     10.314056
std       3.780122
min       2.116245
25%       8.737806
50%      10.207779
75%      12.270907
max      18.763097
dtype: float64

In [85]:
# 9. How to get frequency counts of unique items of a series?
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

0     c
1     d
2     e
3     f
4     c
5     g
6     g
7     d
8     e
9     a
10    c
11    a
12    c
13    b
14    c
15    g
16    a
17    c
18    e
19    d
20    h
21    d
22    g
23    d
24    b
25    b
26    f
27    e
28    e
29    h
dtype: object

In [88]:
ser.value_counts()

c    6
d    5
e    5
g    4
b    3
a    3
f    2
h    2
dtype: int64

In [89]:
# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
ser

0     3
1     1
2     4
3     2
4     3
5     2
6     1
7     1
8     3
9     1
10    1
11    1
dtype: int64

In [96]:
val_counts = ser.value_counts()
ser[~ser.isin(val_counts.index[:2])] = 'Other'
ser

0         3
1         1
2     Other
3     Other
4         3
5     Other
6         1
7         1
8         3
9         1
10        1
11        1
dtype: object