This notebook contains the first part of exercises from:
https://www.w3resource.com/python-exercises/pandas/index.php
It covers essential operations with three basic objects of pandas library: `Series`.

In [1]:
import pandas as pd
import numpy as np

# Pandas Data Series [40 exercises with solution]

In [2]:
# 1 Write a Pandas program to create and display a one-dimensional array-like object 
# containing an array of data using Pandas module.
series = pd.Series(['one', 'two', 'tree', 'for', 'five'])
series

0     one
1     two
2    tree
3     for
4    five
dtype: object

In [3]:
# 2 Write a Pandas program to convert a Panda module Series to Python list and it's type.
print(series.to_list())
print(type(series.to_list()))

['one', 'two', 'tree', 'for', 'five']
<class 'list'>


In [4]:
# 3 Write a Pandas program to add, subtract, multiple and divide two Pandas Series.
series1 = pd.Series([2, 4, 6, 8, 10])
series2 = pd.Series([1, 3, 5, 7, 10])
series_add = series1 + series2
series_sub = series1 - series2
series_mult = series1 * series2
series_div = series1 / series2
print(f'addition:\n{series_add},\nsubtraction:\n{series_sub},\n\
multiplication:\n{series_mult},\ndivision:\n{series_div}')

addition:
0     3
1     7
2    11
3    15
4    20
dtype: int64,
subtraction:
0    1
1    1
2    1
3    1
4    0
dtype: int64,
multiplication:
0      2
1     12
2     30
3     56
4    100
dtype: int64,
division:
0    2.000000
1    1.333333
2    1.200000
3    1.142857
4    1.000000
dtype: float64


In [5]:
# 4 Write a Pandas program to compare the elements of the two Pandas Series.
series2 != series1

0     True
1     True
2     True
3     True
4    False
dtype: bool

In [6]:
# 5 Write a Pandas program to convert a dictionary to a Pandas series. 
pd.Series({'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800})

a    100
b    200
c    300
d    400
e    800
dtype: int64

In [7]:
# 6 Write a Pandas program to convert a NumPy array to a Pandas series.
pd.Series(np.arange(10, 60, 10))

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [8]:
# 7 Write a Pandas program to change the data type of given a column or a Series. 
series7 = pd.Series([100, 200, 'python', 300.12, 400])
pd.to_numeric(series7, errors='coerce')

0    100.00
1    200.00
2       NaN
3    300.12
4    400.00
dtype: float64

In [9]:
# 8 Write a Pandas program to convert the first column of a DataFrame as a Series.
df = pd.DataFrame(
    {'col1': [1, 2, 3, 4, 7 ,11],
     'col2': [4, 5, 6, 9, 5, 0],
     'col3': [7, 5, 8, 12, 1, 11]}
)
df.col1

0     1
1     2
2     3
3     4
4     7
5    11
Name: col1, dtype: int64

In [10]:
# 9 Write a Pandas program to convert a given Series to an array.
series9 = pd.Series(['100', '200', 'python', '300.12', '400'])
series9.to_list()

['100', '200', 'python', '300.12', '400']

In [11]:
# 10 Write a Pandas program to convert Series of lists to one Series.
series10 = pd.Series([
    ['Red', 'Green', 'White'], 
    ['Red', 'Black'], 
    ['Yellow']]
)
list10 = series10.to_list()
new_list = []
for i in range(len(list10)):
    a = list10[i]
    for j in range(len(a)):
          new_list.append(a[j])
pd.Series(new_list)

0       Red
1     Green
2     White
3       Red
4     Black
5    Yellow
dtype: object

In [12]:
# shorter solution
series10_ = pd.Series([
    ['Red', 'Green', 'White'], 
    ['Red', 'Black'], 
    ['Yellow']]
)
print(series10_)
series10_.apply(pd.Series).stack().reset_index(drop=True)

0    [Red, Green, White]
1           [Red, Black]
2               [Yellow]
dtype: object


0       Red
1     Green
2     White
3       Red
4     Black
5    Yellow
dtype: object

In [13]:
# 11 Write a Pandas program to sort a given Series.
series9.sort_values()

0       100
1       200
3    300.12
4       400
2    python
dtype: object

In [14]:
# 12 Write a Pandas program to add some data to an existing Series.
series9.append(pd.Series([500, 'php']))

0       100
1       200
2    python
3    300.12
4       400
0       500
1       php
dtype: object

In [15]:
# 13 Write a Pandas program to create a subset of a given series based on value and condition.
series13 = pd.Series(range(11))
series13[series13 < 6]

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64

In [16]:
# 14 Write a Pandas program to change the order of index of a given series.
series14 = pd.Series(data=[1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E'])
series14.reindex(index=['B', 'A', 'C', 'D', 'E'])

B    2
A    1
C    3
D    4
E    5
dtype: int64

In [17]:
# 15 Write a Pandas program to create the mean and standard deviation of the data of a given Series.
series15 = pd.Series(range(1, 10, 1)).append(pd.Series([5, 3]))
print(series15.mean())
print(series15.std())

4.818181818181818
2.522624895547565


In [18]:
# 16 Write a Pandas program to get the items of a given series not present in another given series.
sr1 = pd.Series(range(1, 6, 1))
sr2 = pd.Series(range(2, 11, 2))
sr1[~ sr1.isin(sr2)]

0    1
2    3
4    5
dtype: int64

In [19]:
# 17 Write a Pandas program to get the items which are not common of two given series. 
sr1[~ sr1.isin(sr2)].append(sr2[~ sr2.isin(sr1)])

0     1
2     3
4     5
2     6
3     8
4    10
dtype: int64

In [20]:
# the other way
sr_un = pd.Series(np.union1d(sr1, sr2))
sr_inter = pd.Series(np.intersect1d(sr1, sr2))
sr_un[~ sr_un.isin(sr_inter)]

0     1
2     3
4     5
5     6
6     8
7    10
dtype: int64

In [21]:
# 18 Write a Pandas program to compute the minimum, 25th percentile, median, 75th, and maximum of a given series.
num_state = np.random.RandomState(100)
num_series = pd.Series(num_state.normal(10, 4, 20))
np.percentile(num_series, q=[0, 25 ,50, 75, 100])

array([ 3.00093811,  8.09463867, 10.23353705, 12.21537733, 14.61214321])

In [22]:
# 19 Write a Pandas program to calculate the frequency counts of each unique value of a given series.
pd.Series(np.random.randint(0, 11, 40)).value_counts()

5     6
3     6
9     5
6     5
8     4
4     4
1     3
10    2
7     2
0     2
2     1
dtype: int64

In [23]:
# 20 Write a Pandas program to display most frequent value in a given series 
# and replace everything else as 'Other' in the series. 
sr20 = pd.Series(np.random.randint(1, 4, 15))
top = sr20.value_counts().idxmax()
sr20.loc[~ (sr20 == top)] = 'Other'
sr20

0     Other
1         2
2     Other
3         2
4         2
5         2
6         2
7     Other
8     Other
9         2
10        2
11    Other
12    Other
13    Other
14    Other
dtype: object

In [24]:
# 21 Write a Pandas program to find the positions of numbers that are multiples of 5 of a given series. 
sr21 = pd.Series(np.random.randint(1, 100, 9))
sr21[sr21 % 5 == 0]

1    25
2    85
3     5
dtype: int64

In [25]:
# 22 Write a Pandas program to extract items at given positions of a given series.
sr22 = pd.Series(np.random.randint(0, 11, 22))
mapping = [0, 2, 6, 11, 21]
sr22.iloc[mapping]

0     6
2     8
6     5
11    7
21    7
dtype: int64

In [26]:
# 23 Write a Pandas program to get the positions of items of a given series in another given series.
sr23_1 = pd.Series(range(1, 11))
sr23_2 = pd.Series(range(1, 11, 2))
list(sr23_1[sr23_1.isin(sr23_2)].index)

[0, 2, 4, 6, 8]

In [27]:
# 24 Write a Pandas program convert the first and last character of each word 
# to upper case in each word of a given series.
sr24 = pd.Series(['php', 'python', 'java', 'c#', 'css'])
sr24.apply(lambda x: x[0].upper() + x[1:-1] + x[-1].upper())

0       PhP
1    PythoN
2      JavA
3        C#
4       CsS
dtype: object

In [28]:
# 25 Write a Pandas program to calculate the number of characters in each word in a given series.
sr24.apply(lambda x: len(x))

0    3
1    6
2    4
3    2
4    3
dtype: int64

In [29]:
# 26 Write a Pandas program to compute difference of differences between consecutive numbers of a given series.
sr26 = pd.Series([1, 3, 5, 8, 10, 11, 15])
print(sr26.diff().tolist())
print(sr26.diff().diff().tolist())

[nan, 2.0, 2.0, 3.0, 2.0, 1.0, 4.0]
[nan, nan, 0.0, 1.0, -1.0, -1.0, 3.0]


In [30]:
# 27 Write a Pandas program to convert a series of date strings to a timeseries.
sr27 = pd.Series([
    '01 Jan 2015', 
    '10-02-2016', 
    '20180307', 
    '2014/05/06', 
    '2016-04-12', 
    '2019-04-06T11:20']
)
sr27 = sr27.apply(lambda x: pd.to_datetime(x))
sr27

0   2015-01-01 00:00:00
1   2016-10-02 00:00:00
2   2018-03-07 00:00:00
3   2014-05-06 00:00:00
4   2016-04-12 00:00:00
5   2019-04-06 11:20:00
dtype: datetime64[ns]

In [31]:
# 28 Write a Pandas program to get the day of month, day of year, week number and day of week 
# from a given series of date strings.
print(sr27.dt.day.to_list())
print(sr27.dt.day_of_year.to_list())
print(sr27.dt.isocalendar().week.to_list())
print(sr27.dt.day_name().to_list())

[1, 2, 7, 6, 12, 6]
[1, 276, 66, 126, 103, 96]
[1, 39, 10, 19, 15, 14]
['Thursday', 'Sunday', 'Wednesday', 'Tuesday', 'Tuesday', 'Saturday']


In [32]:
# 29 Write a Pandas program to convert year-month string to dates adding a specified day of the month.
sr29 = pd.Series(['Jan 2015', 'Feb 2016', 'Mar 2017', 'Apr 2018', 'May 2019'])
sr29.apply(lambda x: pd.to_datetime('11 ' + x))

0   2015-01-11
1   2016-02-11
2   2017-03-11
3   2018-04-11
4   2019-05-11
dtype: datetime64[ns]

In [33]:
# 30 Write a Pandas program to filter words from a given series that contain atleast two vowels.
from collections import Counter

sr30 = pd.Series(['Red', 'Green', 'Orange', 'Pink', 'Yellow', 'White'])
result = sr30.map(lambda x: sum([Counter(x.lower()).get(i, 0) for i in list('aeiou')]) >= 2)
sr30[result]

1     Green
2    Orange
4    Yellow
5     White
dtype: object

In [34]:
# 31 Write a Pandas program to compute the Euclidean distance between two given series.
import math
sr31_1 = pd.Series(range(1, 11))
sr31_2 = pd.Series([11, 8, 7, 5, 6, 5, 3, 4, 7, 1])
math.sqrt(sum((sr31_1 - sr31_2) ** 2))

16.492422502470642

In [35]:
# 32 Write a Pandas program to find the positions of the values neighboured by smaller values 
# on both sides in a given series.
sr32 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
sr32[(sr32 > sr32.shift(1)) & (sr32 > sr32.shift(-1))].index.to_list()

[1, 4, 8]

In [36]:
# 33 Write a Pandas program to replace missing white spaces in a given string with the least frequent character.
string = 'abc def abcdef icd'
char = pd.Series(list(string)).value_counts().idxmin()
string.replace(' ', char)

'abcidefiabcdefiicd'

In [37]:
# 34 Write a Pandas program to compute the autocorrelations of a given numeric series.
sr34 = pd.Series(np.random.uniform(-10, 20, 14))
sr34.autocorr()

0.3368706495153645

In [38]:
# 35 Write a Pandas program to create a TimeSeries to display all the Sundays of given year.
sr35 = pd.Series(pd.date_range('2022-01-01', periods=365, freq='d'))
sr35[sr35.dt.day_name() == 'Sunday']

1     2022-01-02
8     2022-01-09
15    2022-01-16
22    2022-01-23
29    2022-01-30
36    2022-02-06
43    2022-02-13
50    2022-02-20
57    2022-02-27
64    2022-03-06
71    2022-03-13
78    2022-03-20
85    2022-03-27
92    2022-04-03
99    2022-04-10
106   2022-04-17
113   2022-04-24
120   2022-05-01
127   2022-05-08
134   2022-05-15
141   2022-05-22
148   2022-05-29
155   2022-06-05
162   2022-06-12
169   2022-06-19
176   2022-06-26
183   2022-07-03
190   2022-07-10
197   2022-07-17
204   2022-07-24
211   2022-07-31
218   2022-08-07
225   2022-08-14
232   2022-08-21
239   2022-08-28
246   2022-09-04
253   2022-09-11
260   2022-09-18
267   2022-09-25
274   2022-10-02
281   2022-10-09
288   2022-10-16
295   2022-10-23
302   2022-10-30
309   2022-11-06
316   2022-11-13
323   2022-11-20
330   2022-11-27
337   2022-12-04
344   2022-12-11
351   2022-12-18
358   2022-12-25
dtype: datetime64[ns]

In [39]:
# 36 Write a Pandas program to convert given series into a dataframe with its index 
# as another column on the dataframe.
pd.DataFrame(pd.Series(list(range(5)), index=list('ABCDE'))).reset_index()

Unnamed: 0,index,0
0,A,0
1,B,1
2,C,2
3,D,3
4,E,4


In [40]:
# 37 Write a Pandas program to stack two given series vertically and horizontally.
sr37_1 = pd.Series(list(range(10)))
sr37_2 = pd.Series(list('pqrstuvwzy'))
df = sr37_1.to_frame()
df['1'] = sr37_2
df

Unnamed: 0,0,1
0,0,p
1,1,q
2,2,r
3,3,s
4,4,t
5,5,u
6,6,v
7,7,w
8,8,z
9,9,y


In [41]:
# 38 Write a Pandas program to check the equality of two given series.
sr38_1 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
sr38_2 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
sr38_1 == sr38_2

0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
9    True
dtype: bool

In [42]:
# 39 Write a Pandas program to find the index of the first occurrence 
# of the smallest and largest value of a given series.
sr39 = pd.Series([1, 3, 7, 12, 88, 23, 3, 1, 9, 0])
print(sr39.idxmin())
print(sr39.idxmax())

9
4


In [43]:
# 40 Write a Pandas program to check inequality over the index axis of a given dataframe and a given series. 
dict = {
    'W': [68.0, 75.0, 86.0, 80.0, 'NaN'], 
    'X': [78.0, 75.0, 'NaN', 80.0, 86.0],
    'Y': [84, 94, 89, 86, 86],
    'Z': [86, 97, 96, 72, 83]
}
df = pd.DataFrame(dict)
sr40 = pd.Series([68.0, 75.0, 86.0, 80.0, 'NaN'])
df.ne(sr40, axis=0)

Unnamed: 0,W,X,Y,Z
0,False,True,True,True
1,False,False,True,True
2,False,True,True,True
3,False,False,True,True
4,False,True,True,True
