In [2]:
import pandas as pd

# 2. How to create a series from a list, numpy array and dict?
Create a pandas series from each of the items below: a list, numpy and a dictionary

In [3]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [4]:
ser1 = pd.Series(mylist)
ser2 = pd.Series(myarr)
ser3 = pd.Series(mydict)
print(ser3.head())

a    0
b    1
c    2
e    3
d    4
dtype: int64


# 3. How to convert the index of a series into a column of a dataframe?
Difficulty Level: L1

Convert the series ser into a dataframe with its index as another column on the dataframe.

In [5]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

In [6]:
df=pd.DataFrame(ser).reset_index()
df

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


# 4. How to combine many series to form a dataframe?
Difficulty Level: L1

Combine ser1 and ser2 to form a dataframe.

In [7]:
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [8]:
df3=pd.concat([ser1,ser2],ignore_index=True)
df3

0      a
1      b
2      c
3      e
4      d
5      f
6      g
7      h
8      i
9      j
10     k
11     l
12     m
13     n
14     o
15     p
16     q
17     r
18     s
19     t
20     u
21     v
22     w
23     x
24     y
25     z
26     0
27     1
28     2
29     3
30     4
31     5
32     6
33     7
34     8
35     9
36    10
37    11
38    12
39    13
40    14
41    15
42    16
43    17
44    18
45    19
46    20
47    21
48    22
49    23
50    24
51    25
dtype: object

# 5. How to assign name to the series’ index?
Difficulty Level: L1

Give a name to the series ser calling it ‘alphabets’.

In [9]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [10]:
ser.name="alphabets"
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

# 6. How to get the items of series A not present in series B?
Difficulty Level: L2

From ser1 remove items present in ser2.

In [11]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [12]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

# 7. How to get the items not common to both series A and series B?
Difficulty Level: L2

Get all items of ser1 and ser2 not common to both.

In [13]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])


In [14]:
union = pd.Series(np.union1d(ser1, ser2))

intersect = pd.Series(np.intersect1d(ser1, ser2))

notcommonseries = union[~union.isin(intersect)]

print(notcommonseries)

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64


# 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?
Difficuty Level: L2

Compute the minimum, 25th percentile, median, 75th, and maximum of ser.

In [15]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [16]:
state = np.random.RandomState(100)
ser = pd.Series(state.normal(10, 5, 25))
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.25117263,  7.70986507, 10.92259345, 13.36360403, 18.0949083 ])

# 9. How to get frequency counts of unique items of a series?
Difficulty Level: L1

Calculte the frequency counts of each unique value ser.

In [17]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

In [18]:
ser.value_counts()

a    8
c    7
b    5
g    3
d    3
f    2
h    1
e    1
Name: count, dtype: int64

# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?
Difficulty Level: L2

From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.

In [19]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

In [20]:
print("Top 2 Freq:", ser.value_counts())
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'
ser

Top 2 Freq: 3    5
4    3
2    2
1    2
Name: count, dtype: int64


0         3
1         3
2     Other
3         4
4         4
5         3
6     Other
7     Other
8         3
9         4
10    Other
11        3
dtype: object

# 11. How to bin a numeric series to 10 groups of equal size?
Difficulty Level: L2

Bin the series ser into 10 equal deciles and replace the values with the bin name.

In [21]:
ser = pd.Series(np.random.random(20))

In [22]:
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1],
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()

0     9th
1     2nd
2     8th
3     1st
4    10th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

# 12. How to convert a numpy array to a dataframe of given shape? (L1)
Difficulty Level: L1
Reshape the series ser into a dataframe with 7 rows and 5 columns

In [23]:
ser = pd.Series(np.random.randint(1, 10, 35))

In [24]:
df = pd.DataFrame(ser.values.reshape(7,5))
df

Unnamed: 0,0,1,2,3,4
0,2,4,1,8,7
1,7,5,7,8,4
2,9,2,3,3,4
3,2,8,1,5,5
4,1,3,5,9,8
5,3,8,2,5,8
6,8,3,4,4,3


# 13. How to find the positions of numbers that are multiples of 3 from a series?
Difficulty Level: L2

Find the positions of numbers that are multiples of 3 from ser.

In [25]:
ser = pd.Series(np.random.randint(1, 10, 7))

In [26]:
np.argwhere(ser % 3==0)

array([[0],
       [1],
       [5]])

# 14. How to extract items at given positions from a series
Difficulty Level: L1

From ser, extract the items at positions in list pos.

In [27]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

In [28]:
ser.take(pos)

0     a
4     e
8     i
14    o
20    u
dtype: object

# 15. How to stack two series vertically and horizontally ?
Difficulty Level: L1

Stack ser1 and ser2 vertically and horizontally (to form a dataframe).

In [29]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

In [30]:
df = pd.concat([ser1, ser2], axis=1)
df

Unnamed: 0,0,1
0,0,a
1,1,b
2,2,c
3,3,d
4,4,e


# 16. How to get the positions of items of series A in another series B?
Difficulty Level: L2

Get the positions of items of ser2 in ser1 as a list.

In [31]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

In [32]:
[pd.Index(ser1).get_loc(i) for i in ser2]

[5, 4, 0, 8]

# 17. How to compute the mean squared error on a truth and predicted series?
Difficulty Level: L2
Compute the mean squared error of truth and pred series.

In [33]:
truth = pd.Series(range(10))
pred = pd.Series(range(10)) + np.random.random(10)

In [34]:
np.mean((truth-pred)**2)

0.20540319395855028

# 18. How to convert the first character of each element in a series to uppercase?
Difficulty Level: L2

Change the first character of each word to upper case in each word of ser

In [35]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

In [36]:
pd.Series([i.title() for i in ser])

0     How
1      To
2    Kick
3    Ass?
dtype: object

# 19. How to calculate the number of characters in each word in a series?
Difficulty Level: L2

In [37]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])

In [38]:
ser.map(lambda x: len(x))

0    3
1    2
2    4
3    4
dtype: int64

# 20. How to compute difference of differences between consequtive numbers of a series?
Difficulty Level: L1

Difference of differences between the consequtive numbers of ser

In [39]:
ser = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])

In [40]:
print(ser.diff().tolist())
print(ser.diff().diff().tolist())

[nan, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0, 8.0]
[nan, nan, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0]
