This notebook contains the first part of exercises from:
https://www.w3resource.com/python-exercises/pandas/index.php
It covers essential operations with three basic objects of pandas library: `Series`, `DataFrame`, and `Index`.

In [1]:
import pandas as pd
import numpy as np

# Pandas Data Series [40 exercises with solution]

In [2]:
# 1 Write a Pandas program to create and display a one-dimensional array-like object 
# containing an array of data using Pandas module.
series = pd.Series(['one', 'two', 'tree', 'for', 'five'])
series

0     one
1     two
2    tree
3     for
4    five
dtype: object

In [3]:
# 2 Write a Pandas program to convert a Panda module Series to Python list and it's type.
print(series.to_list())
print(type(series.to_list()))

['one', 'two', 'tree', 'for', 'five']
<class 'list'>


In [4]:
# 3 Write a Pandas program to add, subtract, multiple and divide two Pandas Series.
series1 = pd.Series([2, 4, 6, 8, 10])
series2 = pd.Series([1, 3, 5, 7, 10])
series_add = series1 + series2
series_sub = series1 - series2
series_mult = series1 * series2
series_div = series1 / series2
print(f'addition:\n{series_add},\nsubtraction:\n{series_sub},\n\
multiplication:\n{series_mult},\ndivision:\n{series_div}')

addition:
0     3
1     7
2    11
3    15
4    20
dtype: int64,
subtraction:
0    1
1    1
2    1
3    1
4    0
dtype: int64,
multiplication:
0      2
1     12
2     30
3     56
4    100
dtype: int64,
division:
0    2.000000
1    1.333333
2    1.200000
3    1.142857
4    1.000000
dtype: float64


In [5]:
# 4 Write a Pandas program to compare the elements of the two Pandas Series.
series2 != series1

0     True
1     True
2     True
3     True
4    False
dtype: bool

In [6]:
# 5 Write a Pandas program to convert a dictionary to a Pandas series. 
pd.Series({'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800})

a    100
b    200
c    300
d    400
e    800
dtype: int64

In [7]:
# 6 Write a Pandas program to convert a NumPy array to a Pandas series.
pd.Series(np.arange(10, 60, 10))

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [8]:
# 7 Write a Pandas program to change the data type of given a column or a Series. 
series7 = pd.Series([100, 200, 'python', 300.12, 400])
pd.to_numeric(series7, errors='coerce')

0    100.00
1    200.00
2       NaN
3    300.12
4    400.00
dtype: float64

In [9]:
# 8 Write a Pandas program to convert the first column of a DataFrame as a Series.
df = pd.DataFrame(
    {'col1': [1, 2, 3, 4, 7 ,11],
     'col2': [4, 5, 6, 9, 5, 0],
     'col3': [7, 5, 8, 12, 1, 11]}
)
df.col1

0     1
1     2
2     3
3     4
4     7
5    11
Name: col1, dtype: int64

In [10]:
# 9 Write a Pandas program to convert a given Series to an array.
series9 = pd.Series(['100', '200', 'python', '300.12', '400'])
series9.to_list()

['100', '200', 'python', '300.12', '400']

In [11]:
# 10 Write a Pandas program to convert Series of lists to one Series.
series10 = pd.Series([
    ['Red', 'Green', 'White'], 
    ['Red', 'Black'], 
    ['Yellow']]
)
list10 = series10.to_list()
new_list = []
for i in range(len(list10)):
    a = list10[i]
    for j in range(len(a)):
          new_list.append(a[j])
pd.Series(new_list)

0       Red
1     Green
2     White
3       Red
4     Black
5    Yellow
dtype: object

In [12]:
# shorter solution
series10_ = pd.Series([
    ['Red', 'Green', 'White'], 
    ['Red', 'Black'], 
    ['Yellow']]
)
print(series10_)
series10_.apply(pd.Series).stack().reset_index(drop=True)

0    [Red, Green, White]
1           [Red, Black]
2               [Yellow]
dtype: object


0       Red
1     Green
2     White
3       Red
4     Black
5    Yellow
dtype: object

In [13]:
# 11 Write a Pandas program to sort a given Series.
series9.sort_values()

0       100
1       200
3    300.12
4       400
2    python
dtype: object

In [14]:
# 12 Write a Pandas program to add some data to an existing Series.
series9.append(pd.Series([500, 'php']))

0       100
1       200
2    python
3    300.12
4       400
0       500
1       php
dtype: object

In [15]:
# 13 Write a Pandas program to create a subset of a given series based on value and condition.
series13 = pd.Series(range(11))
series13[series13 < 6]

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64

In [16]:
# 14 Write a Pandas program to change the order of index of a given series.
series14 = pd.Series(data=[1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E'])
series14.reindex(index=['B', 'A', 'C', 'D', 'E'])

B    2
A    1
C    3
D    4
E    5
dtype: int64

In [17]:
# 15 Write a Pandas program to create the mean and standard deviation of the data of a given Series.
series15 = pd.Series(range(1, 10, 1)).append(pd.Series([5, 3]))
print(series15.mean())
print(series15.std())

4.818181818181818
2.522624895547565


In [18]:
# 16 Write a Pandas program to get the items of a given series not present in another given series.
sr1 = pd.Series(range(1, 6, 1))
sr2 = pd.Series(range(2, 11, 2))
sr1[~ sr1.isin(sr2)]

0    1
2    3
4    5
dtype: int64

In [19]:
# 17 Write a Pandas program to get the items which are not common of two given series. 
sr1[~ sr1.isin(sr2)].append(sr2[~ sr2.isin(sr1)])

0     1
2     3
4     5
2     6
3     8
4    10
dtype: int64

In [20]:
# the other way
sr_un = pd.Series(np.union1d(sr1, sr2))
sr_inter = pd.Series(np.intersect1d(sr1, sr2))
sr_un[~ sr_un.isin(sr_inter)]

0     1
2     3
4     5
5     6
6     8
7    10
dtype: int64

In [21]:
# 18 Write a Pandas program to compute the minimum, 25th percentile, median, 75th, and maximum of a given series.
num_state = np.random.RandomState(100)
num_series = pd.Series(num_state.normal(10, 4, 20))
np.percentile(num_series, q=[0, 25 ,50, 75, 100])

array([ 3.00093811,  8.09463867, 10.23353705, 12.21537733, 14.61214321])

In [22]:
# 19 Write a Pandas program to calculate the frequency counts of each unique value of a given series.
pd.Series(np.random.randint(0, 11, 40)).value_counts()

5     6
3     6
9     5
6     5
8     4
4     4
1     3
10    2
7     2
0     2
2     1
dtype: int64

In [23]:
# 20 Write a Pandas program to display most frequent value in a given series 
# and replace everything else as 'Other' in the series. 
sr20 = pd.Series(np.random.randint(1, 4, 15))
top = sr20.value_counts().idxmax()
sr20.loc[~ (sr20 == top)] = 'Other'
sr20

0     Other
1         2
2     Other
3         2
4         2
5         2
6         2
7     Other
8     Other
9         2
10        2
11    Other
12    Other
13    Other
14    Other
dtype: object

In [24]:
# 21 Write a Pandas program to find the positions of numbers that are multiples of 5 of a given series. 
sr21 = pd.Series(np.random.randint(1, 100, 9))
sr21[sr21 % 5 == 0]

1    25
2    85
3     5
dtype: int64

In [25]:
# 22 Write a Pandas program to extract items at given positions of a given series.
sr22 = pd.Series(np.random.randint(0, 11, 22))
mapping = [0, 2, 6, 11, 21]
sr22.iloc[mapping]

0     6
2     8
6     5
11    7
21    7
dtype: int64

In [26]:
# 23 Write a Pandas program to get the positions of items of a given series in another given series.
sr23_1 = pd.Series(range(1, 11))
sr23_2 = pd.Series(range(1, 11, 2))
list(sr23_1[sr23_1.isin(sr23_2)].index)

[0, 2, 4, 6, 8]

In [27]:
# 24 Write a Pandas program convert the first and last character of each word 
# to upper case in each word of a given series.
sr24 = pd.Series(['php', 'python', 'java', 'c#', 'css'])
sr24.apply(lambda x: x[0].upper() + x[1:-1] + x[-1].upper())

0       PhP
1    PythoN
2      JavA
3        C#
4       CsS
dtype: object

In [28]:
# 25 Write a Pandas program to calculate the number of characters in each word in a given series.
sr24.apply(lambda x: len(x))

0    3
1    6
2    4
3    2
4    3
dtype: int64

In [29]:
# 26 Write a Pandas program to compute difference of differences between consecutive numbers of a given series.
sr26 = pd.Series([1, 3, 5, 8, 10, 11, 15])
print(sr26.diff().tolist())
print(sr26.diff().diff().tolist())

[nan, 2.0, 2.0, 3.0, 2.0, 1.0, 4.0]
[nan, nan, 0.0, 1.0, -1.0, -1.0, 3.0]


In [30]:
# 27 Write a Pandas program to convert a series of date strings to a timeseries.
sr27 = pd.Series([
    '01 Jan 2015', 
    '10-02-2016', 
    '20180307', 
    '2014/05/06', 
    '2016-04-12', 
    '2019-04-06T11:20']
)
sr27 = sr27.apply(lambda x: pd.to_datetime(x))
sr27

0   2015-01-01 00:00:00
1   2016-10-02 00:00:00
2   2018-03-07 00:00:00
3   2014-05-06 00:00:00
4   2016-04-12 00:00:00
5   2019-04-06 11:20:00
dtype: datetime64[ns]

In [31]:
# 28 Write a Pandas program to get the day of month, day of year, week number and day of week 
# from a given series of date strings.
print(sr27.dt.day.to_list())
print(sr27.dt.day_of_year.to_list())
print(sr27.dt.isocalendar().week.to_list())
print(sr27.dt.day_name().to_list())

[1, 2, 7, 6, 12, 6]
[1, 276, 66, 126, 103, 96]
[1, 39, 10, 19, 15, 14]
['Thursday', 'Sunday', 'Wednesday', 'Tuesday', 'Tuesday', 'Saturday']


In [32]:
# 29 Write a Pandas program to convert year-month string to dates adding a specified day of the month.
sr29 = pd.Series(['Jan 2015', 'Feb 2016', 'Mar 2017', 'Apr 2018', 'May 2019'])
sr29.apply(lambda x: pd.to_datetime('11 ' + x))

0   2015-01-11
1   2016-02-11
2   2017-03-11
3   2018-04-11
4   2019-05-11
dtype: datetime64[ns]

In [33]:
# 30 Write a Pandas program to filter words from a given series that contain atleast two vowels.
from collections import Counter

sr30 = pd.Series(['Red', 'Green', 'Orange', 'Pink', 'Yellow', 'White'])
result = sr30.map(lambda x: sum([Counter(x.lower()).get(i, 0) for i in list('aeiou')]) >= 2)
sr30[result]

1     Green
2    Orange
4    Yellow
5     White
dtype: object

In [34]:
# 31 Write a Pandas program to compute the Euclidean distance between two given series.
import math
sr31_1 = pd.Series(range(1, 11))
sr31_2 = pd.Series([11, 8, 7, 5, 6, 5, 3, 4, 7, 1])
math.sqrt(sum((sr31_1 - sr31_2) ** 2))

16.492422502470642

In [35]:
# 32 Write a Pandas program to find the positions of the values neighboured by smaller values 
# on both sides in a given series.
sr32 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
sr32[(sr32 > sr32.shift(1)) & (sr32 > sr32.shift(-1))].index.to_list()

[1, 4, 8]

In [36]:
# 33 Write a Pandas program to replace missing white spaces in a given string with the least frequent character.
string = 'abc def abcdef icd'
char = pd.Series(list(string)).value_counts().idxmin()
string.replace(' ', char)

'abcidefiabcdefiicd'

In [37]:
# 34 Write a Pandas program to compute the autocorrelations of a given numeric series.
sr34 = pd.Series(np.random.uniform(-10, 20, 14))
sr34.autocorr()

0.3368706495153645

In [38]:
# 35 Write a Pandas program to create a TimeSeries to display all the Sundays of given year.
sr35 = pd.Series(pd.date_range('2022-01-01', periods=365, freq='d'))
sr35[sr35.dt.day_name() == 'Sunday']

1     2022-01-02
8     2022-01-09
15    2022-01-16
22    2022-01-23
29    2022-01-30
36    2022-02-06
43    2022-02-13
50    2022-02-20
57    2022-02-27
64    2022-03-06
71    2022-03-13
78    2022-03-20
85    2022-03-27
92    2022-04-03
99    2022-04-10
106   2022-04-17
113   2022-04-24
120   2022-05-01
127   2022-05-08
134   2022-05-15
141   2022-05-22
148   2022-05-29
155   2022-06-05
162   2022-06-12
169   2022-06-19
176   2022-06-26
183   2022-07-03
190   2022-07-10
197   2022-07-17
204   2022-07-24
211   2022-07-31
218   2022-08-07
225   2022-08-14
232   2022-08-21
239   2022-08-28
246   2022-09-04
253   2022-09-11
260   2022-09-18
267   2022-09-25
274   2022-10-02
281   2022-10-09
288   2022-10-16
295   2022-10-23
302   2022-10-30
309   2022-11-06
316   2022-11-13
323   2022-11-20
330   2022-11-27
337   2022-12-04
344   2022-12-11
351   2022-12-18
358   2022-12-25
dtype: datetime64[ns]

In [39]:
# 36 Write a Pandas program to convert given series into a dataframe with its index 
# as another column on the dataframe.
pd.DataFrame(pd.Series(list(range(5)), index=list('ABCDE'))).reset_index()

Unnamed: 0,index,0
0,A,0
1,B,1
2,C,2
3,D,3
4,E,4


In [40]:
# 37 Write a Pandas program to stack two given series vertically and horizontally.
sr37_1 = pd.Series(list(range(10)))
sr37_2 = pd.Series(list('pqrstuvwzy'))
df = sr37_1.to_frame()
df['1'] = sr37_2
df

Unnamed: 0,0,1
0,0,p
1,1,q
2,2,r
3,3,s
4,4,t
5,5,u
6,6,v
7,7,w
8,8,z
9,9,y


In [41]:
# 38 Write a Pandas program to check the equality of two given series.
sr38_1 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
sr38_2 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
sr38_1 == sr38_2

0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
9    True
dtype: bool

In [42]:
# 39 Write a Pandas program to find the index of the first occurrence 
# of the smallest and largest value of a given series.
sr39 = pd.Series([1, 3, 7, 12, 88, 23, 3, 1, 9, 0])
print(sr39.idxmin())
print(sr39.idxmax())

9
4


In [43]:
# 40 Write a Pandas program to check inequality over the index axis of a given dataframe and a given series. 
dict = {
    'W': [68.0, 75.0, 86.0, 80.0, 'NaN'], 
    'X': [78.0, 75.0, 'NaN', 80.0, 86.0],
    'Y': [84, 94, 89, 86, 86],
    'Z': [86, 97, 96, 72, 83]
}
df = pd.DataFrame(dict)
sr40 = pd.Series([68.0, 75.0, 86.0, 80.0, 'NaN'])
df.ne(sr40, axis=0)

Unnamed: 0,W,X,Y,Z
0,False,True,True,True
1,False,False,True,True
2,False,True,True,True
3,False,False,True,True
4,False,True,True,True


# Pandas DataFrame [81 exercises with solution]

In [44]:
# 1 Write a Pandas program to create a DF from a dict.
pd.DataFrame({'X':[78,85,96,80,86], 'Y':[84,94,89,83,86],'Z':[86,97,96,72,83]})

Unnamed: 0,X,Y,Z
0,78,84,86
1,85,94,97
2,96,89,96
3,80,83,72
4,86,86,83


In [45]:
# 2 Write a Pandas program to create and display a DataFrame from a specified 
# dictionary data which has the index labels.
exam_data = {
    'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
    'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
    'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
    'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
columns = ['attempts', 'name', 'qualify', 'score']
df2 = pd.DataFrame(exam_data, columns=columns, index=labels)
df2

Unnamed: 0,attempts,name,qualify,score
a,1,Anastasia,yes,12.5
b,3,Dima,no,9.0
c,2,Katherine,yes,16.5
d,3,James,no,
e,2,Emily,no,9.0
f,3,Michael,yes,20.0
g,1,Matthew,yes,14.5
h,1,Laura,no,
i,2,Kevin,no,8.0
j,1,Jonas,yes,19.0


In [46]:
# 3 Write a Pandas program to display a summary of the basic information about a specified DataFrame and its data.
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   attempts  10 non-null     int64  
 1   name      10 non-null     object 
 2   qualify   10 non-null     object 
 3   score     8 non-null      float64
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes


In [47]:
# 4 Write a Pandas program to get the first 3 rows of a given DataFrame.
df2.head(3)

Unnamed: 0,attempts,name,qualify,score
a,1,Anastasia,yes,12.5
b,3,Dima,no,9.0
c,2,Katherine,yes,16.5


In [48]:
# 5 Write a Pandas program to select the 'name' and 'score' columns from the following DataFrame.
df2[['name', 'score']]

Unnamed: 0,name,score
a,Anastasia,12.5
b,Dima,9.0
c,Katherine,16.5
d,James,
e,Emily,9.0
f,Michael,20.0
g,Matthew,14.5
h,Laura,
i,Kevin,8.0
j,Jonas,19.0


In [49]:
# 6 Write a Pandas program to select the specified columns and rows from a given data frame.
df2.loc[['b', 'd', 'f', 'g']][['score', 'qualify']]

Unnamed: 0,score,qualify
b,9.0,no
d,,no
f,20.0,yes
g,14.5,yes


In [50]:
# 7 Write a Pandas program to select the rows where the number of attempts in the examination is greater than 2.
df2[df2['attempts'] > 2]

Unnamed: 0,attempts,name,qualify,score
b,3,Dima,no,9.0
d,3,James,no,
f,3,Michael,yes,20.0


In [51]:
# 8 Write a Pandas program to count the number of rows and columns of a DataFrame. 
print('Number of rows: ', df2.shape[0])
print('Number of columns: ', df2.shape[1])

Number of rows:  10
Number of columns:  4


In [52]:
# 9 Write a Pandas program to select the rows where the score is missing, i.e. is NaN.
df2[df2['score'].isna()]

Unnamed: 0,attempts,name,qualify,score
d,3,James,no,
h,1,Laura,no,


In [53]:
# 10 Write a Pandas program to select the rows the score is between 15 and 20 (inclusive)
df2[(df2['score'] >= 15) & (df2['score'] <= 20)]

Unnamed: 0,attempts,name,qualify,score
c,2,Katherine,yes,16.5
f,3,Michael,yes,20.0
j,1,Jonas,yes,19.0


In [54]:
# 11 Write a Pandas program to select the rows where number of attempts 
# in the examination is less than 2 and score greater than 15.
df2[(df2['attempts'] < 2) & (df2['score'] > 15)]

Unnamed: 0,attempts,name,qualify,score
j,1,Jonas,yes,19.0


In [55]:
# 12 Write a Pandas program to change the score in row 'd' to 11.5. 
df2.loc['d', 'score'] = 11.5
df2

Unnamed: 0,attempts,name,qualify,score
a,1,Anastasia,yes,12.5
b,3,Dima,no,9.0
c,2,Katherine,yes,16.5
d,3,James,no,11.5
e,2,Emily,no,9.0
f,3,Michael,yes,20.0
g,1,Matthew,yes,14.5
h,1,Laura,no,
i,2,Kevin,no,8.0
j,1,Jonas,yes,19.0


In [56]:
# 13 Write a Pandas program to calculate the sum of the examination attempts by the students.
df2['attempts'].sum()

19

In [57]:
# 14 Write a Pandas program to calculate the mean score for each different student in DataFrame.
df2['score'].mean()

13.333333333333334

In [58]:
# 15 Write a Pandas program to append a new row 'k' to data frame with given values for each column. 
# Now delete the new row and return the original DataFrame. 
new_row = {'name': 'Suresh', 'score': 15.5, 'attempts': 1, 'qualify': 'yes'}
new_row = pd.DataFrame(new_row, index=['k'])
df15 = df2.append(new_row)
df15 = df15.drop(labels='k')
df15
# another way to add a new row 'k'
# df2.loc['k'] = [1, 'Suresh', 'yes', 15.5]

Unnamed: 0,attempts,name,qualify,score
a,1,Anastasia,yes,12.5
b,3,Dima,no,9.0
c,2,Katherine,yes,16.5
d,3,James,no,11.5
e,2,Emily,no,9.0
f,3,Michael,yes,20.0
g,1,Matthew,yes,14.5
h,1,Laura,no,
i,2,Kevin,no,8.0
j,1,Jonas,yes,19.0


In [59]:
# 16 Write a Pandas program to sort the DataFrame first by 'name' in descending order, 
# then by 'score' in ascending order. 
df2.loc['k'] = [1, 'Anastasia', 'yes', 15.5]
df2.sort_values(['name', 'score'], ascending=[True, False])

Unnamed: 0,attempts,name,qualify,score
k,1,Anastasia,yes,15.5
a,1,Anastasia,yes,12.5
b,3,Dima,no,9.0
e,2,Emily,no,9.0
d,3,James,no,11.5
j,1,Jonas,yes,19.0
c,2,Katherine,yes,16.5
i,2,Kevin,no,8.0
h,1,Laura,no,
g,1,Matthew,yes,14.5


In [60]:
# 17 Write a Pandas program to replace the 'qualify' column contains the values 'yes' and 'no' with True and False.
mapping = {'yes': True, 'no': False}
df17 = df2.copy(deep=True)
df17['qualify'] = df2['qualify'].replace(mapping)
df17
# another way
# df2['qualify'] = df2['qualify'].map({'yes': True, 'no': False})

Unnamed: 0,attempts,name,qualify,score
a,1,Anastasia,True,12.5
b,3,Dima,False,9.0
c,2,Katherine,True,16.5
d,3,James,False,11.5
e,2,Emily,False,9.0
f,3,Michael,True,20.0
g,1,Matthew,True,14.5
h,1,Laura,False,
i,2,Kevin,False,8.0
j,1,Jonas,True,19.0


In [61]:
# 18 Write a Pandas program to change the name 'James' to 'Suresh' in name column of the DataFrame.
df2.loc[df2['name'] == 'James', 'name'] = 'Suresh'
df2
# another way
# df['name'] = df['name'].replace('James', 'Suresh')

Unnamed: 0,attempts,name,qualify,score
a,1,Anastasia,yes,12.5
b,3,Dima,no,9.0
c,2,Katherine,yes,16.5
d,3,Suresh,no,11.5
e,2,Emily,no,9.0
f,3,Michael,yes,20.0
g,1,Matthew,yes,14.5
h,1,Laura,no,
i,2,Kevin,no,8.0
j,1,Jonas,yes,19.0


In [62]:
# 19 Write a Pandas program to delete the 'attempts' column from the DataFrame.
df2.drop('attempts', axis=1)

Unnamed: 0,name,qualify,score
a,Anastasia,yes,12.5
b,Dima,no,9.0
c,Katherine,yes,16.5
d,Suresh,no,11.5
e,Emily,no,9.0
f,Michael,yes,20.0
g,Matthew,yes,14.5
h,Laura,no,
i,Kevin,no,8.0
j,Jonas,yes,19.0


In [63]:
# 20 Write a Pandas program to insert a new column in existing DataFrame.
df2['color'] = ['Red', 'Orange', 'Yellow', 'Green', 'Blue', 'Violet', 'White', 'Black', 'Pink', 'Magenta', 'Red']
df2

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
b,3,Dima,no,9.0,Orange
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
e,2,Emily,no,9.0,Blue
f,3,Michael,yes,20.0,Violet
g,1,Matthew,yes,14.5,White
h,1,Laura,no,,Black
i,2,Kevin,no,8.0,Pink
j,1,Jonas,yes,19.0,Magenta


In [64]:
# 21 Write a Pandas program to iterate over rows in a DataFrame.
exam_data = [{'name':'Anastasia', 'score':12.5}, {'name':'Dima','score':9}, {'name':'Katherine','score':16.5}]
df21 = pd.DataFrame(exam_data)
for index, row in df21.iterrows():
    print(row['name'], row['score'])

Anastasia 12.5
Dima 9.0
Katherine 16.5


In [65]:
# 22 Write a Pandas program to get list from DataFrame column headers.
list(df2.columns)

['attempts', 'name', 'qualify', 'score', 'color']

In [66]:
# 23 Write a Pandas program to rename columns of a given DataFrame
# mapping = {'attempts': 'Attempts', 'name': 'Name', 'qualify': 'Qualify', 'score': 'Score', 'color': 'Color'}
df2.rename(str.capitalize, axis='columns')

Unnamed: 0,Attempts,Name,Qualify,Score,Color
a,1,Anastasia,yes,12.5,Red
b,3,Dima,no,9.0,Orange
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
e,2,Emily,no,9.0,Blue
f,3,Michael,yes,20.0,Violet
g,1,Matthew,yes,14.5,White
h,1,Laura,no,,Black
i,2,Kevin,no,8.0,Pink
j,1,Jonas,yes,19.0,Magenta


In [67]:
# 24 Write a Pandas program to select rows from a given DataFrame based on values in some columns.
df2.query('attempts == 2')

Unnamed: 0,attempts,name,qualify,score,color
c,2,Katherine,yes,16.5,Yellow
e,2,Emily,no,9.0,Blue
i,2,Kevin,no,8.0,Pink


In [68]:
# 25 Write a Pandas program to change the order of a DataFrame columns.
df2[['name', 'score', 'qualify', 'attempts']]

Unnamed: 0,name,score,qualify,attempts
a,Anastasia,12.5,yes,1
b,Dima,9.0,no,3
c,Katherine,16.5,yes,2
d,Suresh,11.5,no,3
e,Emily,9.0,no,2
f,Michael,20.0,yes,3
g,Matthew,14.5,yes,1
h,Laura,,no,1
i,Kevin,8.0,no,2
j,Jonas,19.0,yes,1


In [69]:
# 26 Write a Pandas program to add one row in an existing DataFrame.
df2.loc['k'] = [1, 'Tatiana', 'yes', 22, 'Red']
df2

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
b,3,Dima,no,9.0,Orange
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
e,2,Emily,no,9.0,Blue
f,3,Michael,yes,20.0,Violet
g,1,Matthew,yes,14.5,White
h,1,Laura,no,,Black
i,2,Kevin,no,8.0,Pink
j,1,Jonas,yes,19.0,Magenta


In [70]:
# 27 Write a Pandas program to write a DataFrame to CSV file using tab separator. 
df2.reset_index().to_csv('df2.csv', sep='\t', index=False)

In [71]:
# 28 Write a Pandas program to count city wise number of people 
# from a given of data set (city, name of the person).
df28 = df2.copy(deep=True)
df28['state'] = ['New York', 'Arisona', 'California', 
                 'Texas', 'California', 'Los Angeles', 
                 'Los Angeles', 'California', 'Arisona',
                 'New York', 'California']
df28.state.value_counts()

California     4
New York       2
Arisona        2
Los Angeles    2
Texas          1
Name: state, dtype: int64

In [72]:
# 29 Write a Pandas program to delete DataFrame row(s) based on given column value.
df2.drop(df2.query('name == "Michael"').index)
# another way
# new_df = df2[df2.name != 'Michael']

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
b,3,Dima,no,9.0,Orange
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
e,2,Emily,no,9.0,Blue
g,1,Matthew,yes,14.5,White
h,1,Laura,no,,Black
i,2,Kevin,no,8.0,Pink
j,1,Jonas,yes,19.0,Magenta
k,1,Tatiana,yes,22.0,Red


In [73]:
# 31 Write a Pandas program to select a row of series/dataframe by given integer index.
df2.loc[['f']]

Unnamed: 0,attempts,name,qualify,score,color
f,3,Michael,yes,20.0,Violet


In [74]:
# 32 Write a Pandas program to replace all the NaN values with Zero's in a column of a dataframe.
df2.fillna(0)

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
b,3,Dima,no,9.0,Orange
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
e,2,Emily,no,9.0,Blue
f,3,Michael,yes,20.0,Violet
g,1,Matthew,yes,14.5,White
h,1,Laura,no,0.0,Black
i,2,Kevin,no,8.0,Pink
j,1,Jonas,yes,19.0,Magenta


In [75]:
# 33 Write a Pandas program to convert index in a column of the given dataframe.
df2.reset_index()

Unnamed: 0,index,attempts,name,qualify,score,color
0,a,1,Anastasia,yes,12.5,Red
1,b,3,Dima,no,9.0,Orange
2,c,2,Katherine,yes,16.5,Yellow
3,d,3,Suresh,no,11.5,Green
4,e,2,Emily,no,9.0,Blue
5,f,3,Michael,yes,20.0,Violet
6,g,1,Matthew,yes,14.5,White
7,h,1,Laura,no,,Black
8,i,2,Kevin,no,8.0,Pink
9,j,1,Jonas,yes,19.0,Magenta


In [76]:
# 34 Write a Pandas program to set a given value for particular cell in  DataFrame using index value. 
df2.at['i', 'score'] = 10.2
df2

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
b,3,Dima,no,9.0,Orange
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
e,2,Emily,no,9.0,Blue
f,3,Michael,yes,20.0,Violet
g,1,Matthew,yes,14.5,White
h,1,Laura,no,,Black
i,2,Kevin,no,10.2,Pink
j,1,Jonas,yes,19.0,Magenta


In [77]:
# 35 Write a Pandas program to count the NaN values in one or more columns in DataFrame.
df2.isna().sum()

attempts    0
name        0
qualify     0
score       1
color       0
dtype: int64

In [78]:
# 36 Write a Pandas program to drop a list of rows from a specified DataFrame.
df2.drop(['b', 'e', 'h'])

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
f,3,Michael,yes,20.0,Violet
g,1,Matthew,yes,14.5,White
i,2,Kevin,no,10.2,Pink
j,1,Jonas,yes,19.0,Magenta
k,1,Tatiana,yes,22.0,Red


In [79]:
# 39 Write a Pandas program to combining two series into a DataFrame.
sr39_1 = pd.Series([100, 200, 'python', 300.12, 400])
sr39_2 = pd.Series([10, 20, 'php', 30.12, 40])
pd.concat([sr39_1, sr39_2], axis=1)

Unnamed: 0,0,1
0,100,10
1,200,20
2,python,php
3,300.12,30.12
4,400,40


In [80]:
# 40 Write a Pandas program to shuffle a given DataFrame rows.
df2.sample(frac=1)

Unnamed: 0,attempts,name,qualify,score,color
h,1,Laura,no,,Black
f,3,Michael,yes,20.0,Violet
c,2,Katherine,yes,16.5,Yellow
b,3,Dima,no,9.0,Orange
a,1,Anastasia,yes,12.5,Red
e,2,Emily,no,9.0,Blue
d,3,Suresh,no,11.5,Green
k,1,Tatiana,yes,22.0,Red
j,1,Jonas,yes,19.0,Magenta
i,2,Kevin,no,10.2,Pink


In [81]:
# 41 Write a Pandas program to convert DataFrame column type from string to datetime.
df41 = pd.DataFrame(['3/11/2000', '3/12/2000', '3/13/2000'])
df41[0] = pd.to_datetime(df41[0])
df41

Unnamed: 0,0
0,2000-03-11
1,2000-03-12
2,2000-03-13


In [82]:
# 42 Write a Pandas program to rename a specific column name in a given DataFrame.
df2.rename({'attempts': 'Attempts'}, axis='columns')

Unnamed: 0,Attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
b,3,Dima,no,9.0,Orange
c,2,Katherine,yes,16.5,Yellow
d,3,Suresh,no,11.5,Green
e,2,Emily,no,9.0,Blue
f,3,Michael,yes,20.0,Violet
g,1,Matthew,yes,14.5,White
h,1,Laura,no,,Black
i,2,Kevin,no,10.2,Pink
j,1,Jonas,yes,19.0,Magenta


In [83]:
# 43 Write a Pandas program to get a list of a specified column of a DataFrame. 
df2.name.to_list()

['Anastasia',
 'Dima',
 'Katherine',
 'Suresh',
 'Emily',
 'Michael',
 'Matthew',
 'Laura',
 'Kevin',
 'Jonas',
 'Tatiana']

In [84]:
# 44 Write a Pandas program to create a DataFrame from a Numpy array and specify the index 
# column and column headers.
arr = {'Column1': np.ndarray(15),
       'Column2': np.ndarray(15),
       'Column3': np.ndarray(15)
      }
index = ['Index' + str(i) for i in range(1, 16)]
df44_1 = pd.DataFrame(arr, index=index)
df44_1

Unnamed: 0,Column1,Column2,Column3
Index1,1.039778e-312,0.0,0.0
Index2,9.761181e-313,2.4e-322,2.376635e-312
Index3,1.018558e-312,0.0,2.419075e-312
Index4,1.018558e-312,0.0,2.376635e-312
Index5,1.018558e-312,0.0,2.228096e-312
Index6,1.209538e-312,0.0,2.461515e-312
Index7,1.018558e-312,0.0,6.790387e-313
Index8,1.018558e-312,0.0,2.355415e-312
Index9,1.018558e-312,0.0,2.461515e-312
Index10,2.37e-322,0.0,6.790387e-313


In [85]:
dtype = [('Column1', 'int32'), ('Column2', 'float32'), ('Column3', 'float32')]
values = np.zeros(15, dtype=dtype)
index = ['Index' + str(i) for i in range(1, len(values)+1)]
df44_2 = pd.DataFrame(values, index=index)
df44_2

Unnamed: 0,Column1,Column2,Column3
Index1,0,0.0,0.0
Index2,0,0.0,0.0
Index3,0,0.0,0.0
Index4,0,0.0,0.0
Index5,0,0.0,0.0
Index6,0,0.0,0.0
Index7,0,0.0,0.0
Index8,0,0.0,0.0
Index9,0,0.0,0.0
Index10,0,0.0,0.0


In [86]:
# 45 Write a Pandas program to find the row for where the value of a given column is maximum. 
print(f'max value in Column1 is on {df44_1.Column1.idxmax()} position')
print(f'max value in Column2 is on {df44_1.Column2.idxmax()} position')
print(f'max value in Column3 is on {df44_1.Column3.idxmax()} position')

max value in Column1 is on Index6 position
max value in Column2 is on Index2 position
max value in Column3 is on Index9 position


In [87]:
# 46 Write a Pandas program to check whether a given column is present in a DataFrame or not.
col = 'score'
if col in df2.columns:
    print(f'{col} is present in DataFrame')
else:
    print(f'{col} is not present in DataFrame')

score is present in DataFrame


In [88]:
# 47 Write a Pandas program to get the specified row value of a given DataFrame. 
df2.loc['a']

attempts            1
name        Anastasia
qualify           yes
score            12.5
color             Red
Name: a, dtype: object

In [89]:
# 48 Write a Pandas program to get the datatypes of columns of a DataFrame. 
df2.dtypes

attempts      int64
name         object
qualify      object
score       float64
color        object
dtype: object

In [90]:
# 49 Write a Pandas program to append data to an empty DataFrame.
empty = pd.DataFrame()
data = pd.DataFrame({'col1': range(3), 'col2': range(3)})
df49 = empty.append(data)
df49

Unnamed: 0,col1,col2
0,0,0
1,1,1
2,2,2


In [91]:
# 50 Write a Pandas program to sort a given DataFrame by two or more columns.
df2.sort_values(['attempts', 'name'])

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12.5,Red
j,1,Jonas,yes,19.0,Magenta
h,1,Laura,no,,Black
g,1,Matthew,yes,14.5,White
k,1,Tatiana,yes,22.0,Red
e,2,Emily,no,9.0,Blue
c,2,Katherine,yes,16.5,Yellow
i,2,Kevin,no,10.2,Pink
b,3,Dima,no,9.0,Orange
f,3,Michael,yes,20.0,Violet


In [92]:
# 51 Write a Pandas program to convert the datatype of a given column (floats to ints).
df51 = df2.copy(deep=True)
df51.score = df51.score.fillna(0).astype(int)
df51

Unnamed: 0,attempts,name,qualify,score,color
a,1,Anastasia,yes,12,Red
b,3,Dima,no,9,Orange
c,2,Katherine,yes,16,Yellow
d,3,Suresh,no,11,Green
e,2,Emily,no,9,Blue
f,3,Michael,yes,20,Violet
g,1,Matthew,yes,14,White
h,1,Laura,no,0,Black
i,2,Kevin,no,10,Pink
j,1,Jonas,yes,19,Magenta


In [93]:
# 52 Write a Pandas program to remove infinite values from a given DataFrame.
df52 = pd.DataFrame([1000.000000, 2000.000000, 3000.000000, 4000.000000, np.inf, -np.inf])
df52 = df52.replace([np.inf, -np.inf], np.nan)
df52

Unnamed: 0,0
0,1000.0
1,2000.0
2,3000.0
3,4000.0
4,
5,


In [94]:
# 53 Write a Pandas program to insert a given column at a specific column index in a DataFrame.
df53 = pd.DataFrame({'col2': [4, 5, 6, 9, 5], 'col3': [7, 8, 12, 1, 11]})
df53.insert(0, 'col1', [1, 2, 3, 4, 7])
df53

Unnamed: 0,col1,col2,col3
0,1,4,7
1,2,5,8
2,3,6,12
3,4,9,1
4,7,5,11


In [95]:
# 54 Write a Pandas program to convert a given list of lists into a Dataframe.
source = [[2, 4], [1, 3]]
pd.DataFrame(source)

Unnamed: 0,0,1
0,2,4
1,1,3


In [96]:
# 55 Write a Pandas program to group by the first column and get second column as lists in rows.
df55 = pd.DataFrame({'col1': ['C1', 'C1', 'C2', 'C2', 'C2', 'C3', 'C2'],
                     'col2': [1, 2, 3, 3, 4, 6, 5]})
df55.groupby('col1')['col2'].apply(list)

col1
C1          [1, 2]
C2    [3, 3, 4, 5]
C3             [6]
Name: col2, dtype: object

In [97]:
# 56 Write a Pandas program to get column index from column name of a given DataFrame.
# list(df55.columns).index('col2')
df55.columns.get_loc('col2')

1

In [98]:
# 57 Write a Pandas program to count number of columns of a DataFrame.
# df55.shape[1]
# len(df55.columns)
df55.columns.size

2

In [99]:
# 58 Write a Pandas program to select all columns, except one given column in a DataFrame.
df53.loc[:, df53.columns != 'col3']

Unnamed: 0,col1,col2
0,1,4
1,2,5
2,3,6
3,4,9
4,7,5


In [100]:
# 59 Write a Pandas program to get first n records of a DataFrame.
df53.head(3)

Unnamed: 0,col1,col2,col3
0,1,4,7
1,2,5,8
2,3,6,12


In [101]:
# 60 Write a Pandas program to get last n records of a DataFrame.
df53.tail(3)

Unnamed: 0,col1,col2,col3
2,3,6,12
3,4,9,1
4,7,5,11


In [102]:
# 61 Write a Pandas program to get topmost n records within each group of a DataFrame. 
print(df53.sort_values('col1', ascending=False).head(3))
print(df53.sort_values('col2', ascending=False).head(3))
print(df53.sort_values('col3', ascending=False).head(3))

   col1  col2  col3
4     7     5    11
3     4     9     1
2     3     6    12
   col1  col2  col3
3     4     9     1
2     3     6    12
1     2     5     8
   col1  col2  col3
2     3     6    12
4     7     5    11
1     2     5     8


In [103]:
# 62 Write a Pandas program to remove first n rows of a given DataFrame. 
# df53.drop([0, 1, 2])
df53.iloc[3:]

Unnamed: 0,col1,col2,col3
3,4,9,1
4,7,5,11


In [104]:
# 63 Write a Pandas program to remove last n rows of a given DataFrame.
df53[:3]

Unnamed: 0,col1,col2,col3
0,1,4,7
1,2,5,8
2,3,6,12


In [105]:
# 64 Write a Pandas program to add a prefix or suffix to all columns of a given DataFrame
df64 = pd.DataFrame({
    'W': [68, 75, 86, 80, 66],
    'X': [78, 85, 96, 80, 86],
    'Y': [84, 94, 89, 83, 86], 
    'Z': [86, 97, 96, 72, 83]
})
# df64.columns = ['A_' + column for column in df64.columns]
df64.add_prefix('A_')
df64.add_suffix('_1')

Unnamed: 0,W_1,X_1,Y_1,Z_1
0,68,78,84,86
1,75,85,94,97
2,86,96,89,96
3,80,80,83,72
4,66,86,86,83


In [106]:
# 65 Write a Pandas program to reverse order (rows, columns) of a given DataFrame.
df64.iloc[:, ::-1]
df64.iloc[::-1, :]

Unnamed: 0,W,X,Y,Z
4,66,86,86,83
3,80,80,83,72
2,86,96,89,96
1,75,85,94,97
0,68,78,84,86


In [107]:
# 66 Write a Pandas program to select columns by data type of a given DataFrame.
df66 = pd.DataFrame({
    'name': ['Alberto Franco', 'Gino Mcneill', 'Ryan Parkes', 'Eesha Hinton', 'Syed Wharton'],
    'date_of_birth': ['17/05/2002', '16/02/1999', '25/09/1998', '11/05/2002', '15/09/1997'],
    'age': [18, 21, 22, 22, 23],
    'weight': [66.5, 55.4, 48.2, 80.1, 75.4]
})
df66.select_dtypes(include='float64')  # only float
df66.select_dtypes(include='object')  # only string
df66.select_dtypes(include='number')  # all numbers (float + integer)

Unnamed: 0,age,weight
0,18,66.5
1,21,55.4
2,22,48.2
3,22,80.1
4,23,75.4


In [108]:
# 67 Write a Pandas program to split a given DataFrame into two random subsets. 
df66_1 = df66.sample(frac=0.6)
df66_2 = df66.drop(df66_1.index)
print(df66_1)
print(df66_2)

             name date_of_birth  age  weight
1    Gino Mcneill    16/02/1999   21    55.4
3    Eesha Hinton    11/05/2002   22    80.1
0  Alberto Franco    17/05/2002   18    66.5
           name date_of_birth  age  weight
2   Ryan Parkes    25/09/1998   22    48.2
4  Syed Wharton    15/09/1997   23    75.4


In [109]:
# 68 Write a Pandas program to rename all columns with the same pattern of a given DataFrame.
df66 = df66.rename(str.capitalize, axis='columns').add_suffix('  ')
# df66 = df66.rename(str.lower, axis='columns').rename(str.strip, axis='columns')
df66.columns = df66.columns.str.lower().str.rstrip()
df66

Unnamed: 0,name,date_of_birth,age,weight
0,Alberto Franco,17/05/2002,18,66.5
1,Gino Mcneill,16/02/1999,21,55.4
2,Ryan Parkes,25/09/1998,22,48.2
3,Eesha Hinton,11/05/2002,22,80.1
4,Syed Wharton,15/09/1997,23,75.4


In [110]:
# 69 Write a Pandas program to merge datasets and check uniqueness.
df69 = pd.DataFrame({
    'name': ['Ryan Parkes', 'Eesha Hinton', 'Syed Wharton'],
    'date_of_birth': ['25/09/1998', '11/05/2002', '15/09/1997'],
    'age': [22, 22, 23],
    'weight': [48.2, 80.1, 75.4]
})
pd.concat([df66, df69])
pd.concat([df66, df69], verify_integrity=True, ignore_index=True)
pd.merge(df66, df69, validate='one_to_one')
pd.merge(df66, df69, validate='one_to_many')

Unnamed: 0,name,date_of_birth,age,weight
0,Ryan Parkes,25/09/1998,22,48.2
1,Eesha Hinton,11/05/2002,22,80.1
2,Syed Wharton,15/09/1997,23,75.4


In [111]:
# 70 Write a Pandas program to convert continuous values of a column in a given DataFrame to categorical.
df70 = pd.DataFrame({
    'name': ['Alberto Franco','Gino Mcneill','Ryan Parkes', 'Eesha Hinton', 'Syed Wharton'],
    'age': [18, 22, 40, 80, 5]
})
pd.cut(df70.age, bins=[0, 18, 65, 99], labels=['kids', 'adult', 'elderly'])

0       kids
1      adult
2      adult
3    elderly
4       kids
Name: age, dtype: category
Categories (3, object): ['kids' < 'adult' < 'elderly']

In [112]:
# 71 Write a Pandas program to display memory usage of a given DataFrame and every column of the DataFrame.
df66.info()
df66.memory_usage(deep=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   name           5 non-null      object 
 1   date_of_birth  5 non-null      object 
 2   age            5 non-null      int64  
 3   weight         5 non-null      float64
dtypes: float64(1), int64(1), object(2)
memory usage: 288.0+ bytes


Index            128
name             346
date_of_birth    335
age               40
weight            40
dtype: int64

In [113]:
# 72 Write a Pandas program to combine many given series to create a DataFrame.
sr71_1 = pd.Series(['php', 'python', 'java', 'c#', 'c++'])
sr71_2 = pd.Series(range(1, 6))
pd.concat([sr71_2, sr71_1], axis='columns')
pd.DataFrame({'col1': sr71_1, 'col2': sr71_2})

Unnamed: 0,col1,col2
0,php,1
1,python,2
2,java,3
3,c#,4
4,c++,5


In [114]:
# 73 Write a Pandas program to create DataFrames that contains random values, contains missing values, contains datetime values and contains mixed values.
pd.util.testing.makeDataFrame()
pd.util.testing.makeMissingDataframe()
pd.util.testing.makeTimeDataFrame()
pd.util.testing.makeMixedDataFrame()

  import pandas.util.testing


Unnamed: 0,A,B,C,D
0,0.0,0.0,foo1,2009-01-01
1,1.0,1.0,foo2,2009-01-02
2,2.0,0.0,foo3,2009-01-05
3,3.0,1.0,foo4,2009-01-06
4,4.0,0.0,foo5,2009-01-07


In [115]:
# 74 Write a Pandas program to fill missing values in time series data.
df74 = pd.DataFrame(
    {
        'c1': [120.0, 130.0, 140.0, 150.0, np.nan, 170.0 ],
        'c2': [7.0, np.nan, 10.0, np.nan, 5.5, 16.5]
    }, 
    index=pd.date_range(start='2000-01-03', end='2000-01-8')
)
# df74.apply(lambda x: x.interpolate())
df74.interpolate()

Unnamed: 0,c1,c2
2000-01-03,120.0,7.0
2000-01-04,130.0,8.5
2000-01-05,140.0,10.0
2000-01-06,150.0,7.75
2000-01-07,160.0,5.5
2000-01-08,170.0,16.5


In [116]:
# 75 Write a Pandas program to use a local variable within a query. 
df64.query('W < @df64.W.max()')

Unnamed: 0,W,X,Y,Z
0,68,78,84,86
1,75,85,94,97
3,80,80,83,72
4,66,86,86,83


In [117]:
# 76 Write a Pandas program to clean object column with mixed data of a given DataFrame using regular expression.
df76 = pd.DataFrame({
    'agent': ['a001', 'a002', 'a003', 'a003', 'a004'],
    'purchase': [4500, 7500, '$3000.25', '$1250', 9000.00]
})
# df76.purchase.apply(lambda x: str(x).replace('$', '')).apply(lambda x: pd.to_numeric(x))
df76.purchase.replace('[$, ]', '', regex=True).astype(float)

0    4500.00
1    7500.00
2    3000.25
3    1250.00
4    9000.00
Name: purchase, dtype: float64

In [118]:
# 77 Write a Pandas program to get the numeric representation of an array 
# by identifying distinct values of a given column of a dataframe.
pd.factorize(df66.name)

# Encode the object as an enumerated type or categorical variable.
# This method is useful for obtaining a numeric representation of an array when all that matters 
# is identifying distinct values.

(array([0, 1, 2, 3, 4]),
 Index(['Alberto Franco', 'Gino Mcneill', 'Ryan Parkes', 'Eesha Hinton',
        'Syed Wharton'],
       dtype='object'))

In [119]:
# 78 Write a Pandas program to replace the current value in a dataframe column based on last largest value. 
# If the current value is less than last largest value replaces the value with 0.
df78 = pd.DataFrame({'rnum': np.random.randint(1, 35, 15)})
df78.rnum = df78.rnum.where(df78.rnum.eq(df78.rnum.cummax()), 0) # Replace values where the condition is False.
df78

Unnamed: 0,rnum
0,21
1,0
2,30
3,0
4,33
5,0
6,0
7,0
8,0
9,0


In [120]:
# 79 Write a Pandas program to create a DataFrame from the clipboard 
# (data from an Excel spreadsheet or a Google Sheet).
df79 = pd.read_clipboard()
df79

Unnamed: 0,0,s001,V,Alberto,Franco,15/05/2002,35,street1,t1
0,1,s002,V,Gino,Mcneill,17/05/2002,32,street2,t2
1,2,s003,VI,Ryan,Parkes,16/02/1999,33,street3,t3
2,3,s001,VI,Eesha,Hinton,25/09/1998,30,street1,t4
3,4,s002,V,Gino,Mcneill,11/05/2002,31,street2,t5
4,5,s004,VI,David,Parkes,15/09/1997,32,street4,t6


In [121]:
# 80 Write a Pandas program to check for inequality of two given DataFrames. 
df64.loc[4, 'W'] = np.nan
df64.loc[2, 'X'] = np.nan
df80 = pd.DataFrame({
    'W': [78, 75, 86, 80, np.nan],
    'X': [78, 85, 96, 80, 76],
    'Y': [84, 84, 89, 83, 86], 
    'Z': [86, 97, 96, 72, 83]
})
# df64 != df80
df64.ne(df80)

Unnamed: 0,W,X,Y,Z
0,True,False,False,False
1,False,False,True,False
2,False,True,False,False
3,False,False,False,False
4,True,True,False,False


In [122]:
# 81 Write a Pandas program to get lowest n records within each group of a given DataFrame.
# df53.iloc[df53.col1.sort_values().head(3).index]
# df53.iloc[df53.col2.sort_values().head(3).index]
# df53.iloc[df53.col3.sort_values().head(3).index]
df53.nsmallest(3, 'col1')
df53.nsmallest(3, 'col2')
df53.nsmallest(3, 'col3')

Unnamed: 0,col1,col2,col3
3,4,9,1
0,1,4,7
1,2,5,8


# Pandas Indexing [ 26 exercises with solution]

In [143]:
#  1 Write a Pandas program to display the default index and set a column as an Index in a given dataframe.
dfi_1 = pd.read_clipboard()
dfi_1.loc[:, 'date'] = pd.to_datetime(dfi_1.date)
dfi_1.rename(columns={'date': 'birthday'}, inplace=True)
dfi_1.set_index('birthday', inplace=True)
dfi_1

Unnamed: 0_level_0,s,class,first_name,last_name,age,address,mark
birthday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2002-05-15,s001,V,Alberto,Franco,35,street1,t1
2002-05-17,s002,V,Gino,Mcneill,32,street2,t2
1999-02-16,s003,VI,Ryan,Parkes,33,street3,t3
1998-09-25,s001,VI,Eesha,Hinton,30,street1,t4
2002-11-05,s002,V,Gino,Mcneill,31,street2,t5
1997-09-15,s004,VI,David,Parkes,32,street4,t6


In [162]:
# 2 Write a Pandas program to create a multi Index frame using two columns and using an Index and a column.
dfi_2 = dfi_1.reset_index()
dfi_2 = dfi_2.rename(columns={
    'birthday': 'day_of_birth',
    's': 'school_code',
    'age': 'weight',
    'mark': 't_id'
})
dfi_2['name'] = dfi_2['first_name'] + ' ' + dfi_2['last_name']
dfi_2 = dfi_2.drop(columns = ['first_name', 'last_name'])
dfi_2.set_index(['t_id', 'school_code'])
dfi_2.set_index([pd.Index([0, 1, 2, 3, 4, 5]), 't_id'])

Unnamed: 0_level_0,Unnamed: 1_level_0,day_of_birth,school_code,class,weight,address,name
Unnamed: 0_level_1,t_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,t1,2002-05-15,s001,V,35,street1,Alberto Franco
1,t2,2002-05-17,s002,V,32,street2,Gino Mcneill
2,t3,1999-02-16,s003,VI,33,street3,Ryan Parkes
3,t4,1998-09-25,s001,VI,30,street1,Eesha Hinton
4,t5,2002-11-05,s002,V,31,street2,Gino Mcneill
5,t6,1997-09-15,s004,VI,32,street4,David Parkes


In [168]:
# 3 Write a Pandas program to display the default index and set a column 
# as an Index in a given dataframe and then reset the index.
dfi_3 = dfi_2.copy(deep=True)
list(dfi_3.index)
dfi_3 = dfi_3.set_index('t_id')
dfi_3.reset_index()

Unnamed: 0,t_id,day_of_birth,school_code,class,weight,address,name
0,t1,2002-05-15,s001,V,35,street1,Alberto Franco
1,t2,2002-05-17,s002,V,32,street2,Gino Mcneill
2,t3,1999-02-16,s003,VI,33,street3,Ryan Parkes
3,t4,1998-09-25,s001,VI,30,street1,Eesha Hinton
4,t5,2002-11-05,s002,V,31,street2,Gino Mcneill
5,t6,1997-09-15,s004,VI,32,street4,David Parkes


In [180]:
# 6 Write a Pandas program to create a dataframe indexing by date and time.
start = pd.to_datetime('2022-01-01', format='%Y-%m-%d')
dfi_2.set_index(pd.date_range(start=start, periods=len(dfi_2.index)))

Unnamed: 0,day_of_birth,school_code,class,weight,address,t_id,name
2022-01-01,2002-05-15,s001,V,35,street1,t1,Alberto Franco
2022-01-02,2002-05-17,s002,V,32,street2,t2,Gino Mcneill
2022-01-03,1999-02-16,s003,VI,33,street3,t3,Ryan Parkes
2022-01-04,1998-09-25,s001,VI,30,street1,t4,Eesha Hinton
2022-01-05,2002-11-05,s002,V,31,street2,t5,Gino Mcneill
2022-01-06,1997-09-15,s004,VI,32,street4,t6,David Parkes


In [182]:
# 7 Write a Pandas program to create a dataframe and set a title or name of the index column.
dfi_2.index.name = 'index'
dfi_2

Unnamed: 0_level_0,day_of_birth,school_code,class,weight,address,t_id,name
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2002-05-15,s001,V,35,street1,t1,Alberto Franco
1,2002-05-17,s002,V,32,street2,t2,Gino Mcneill
2,1999-02-16,s003,VI,33,street3,t3,Ryan Parkes
3,1998-09-25,s001,VI,30,street1,t4,Eesha Hinton
4,2002-11-05,s002,V,31,street2,t5,Gino Mcneill
5,1997-09-15,s004,VI,32,street4,t6,David Parkes


In [184]:
# 8 Write a Pandas program to set value in a specific cell in a given dataframe using index.
dfi_2.at[3, 'weight'] = 29
dfi_2

Unnamed: 0_level_0,day_of_birth,school_code,class,weight,address,t_id,name
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2002-05-15,s001,V,35,street1,t1,Alberto Franco
1,2002-05-17,s002,V,32,street2,t2,Gino Mcneill
2,1999-02-16,s003,VI,33,street3,t3,Ryan Parkes
3,1998-09-25,s001,VI,29,street1,t4,Eesha Hinton
4,2002-11-05,s002,V,31,street2,t5,Gino Mcneill
5,1997-09-15,s004,VI,32,street4,t6,David Parkes


In [186]:
# 9 Write a Pandas program to convert index of a given dataframe into a column.
dfi_2.reset_index()

Unnamed: 0,index,day_of_birth,school_code,class,weight,address,t_id,name
0,0,2002-05-15,s001,V,35,street1,t1,Alberto Franco
1,1,2002-05-17,s002,V,32,street2,t2,Gino Mcneill
2,2,1999-02-16,s003,VI,33,street3,t3,Ryan Parkes
3,3,1998-09-25,s001,VI,29,street1,t4,Eesha Hinton
4,4,2002-11-05,s002,V,31,street2,t5,Gino Mcneill
5,5,1997-09-15,s004,VI,32,street4,t6,David Parkes


In [217]:
# 10 Write a Pandas program to convert 1st and 3rd levels in the index into columns 
# from a multiple level of index frame of a given dataframe.
dfi_10 = dfi_3.reset_index().set_index([pd.Index(range(dfi_3.shape[0])), 't_id', 'school_code'])
dfi_10
dfi_10.reset_index(level=[0, 'school_code'])

Unnamed: 0_level_0,level_0,school_code,day_of_birth,class,weight,address,name
t_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
t1,0,s001,2002-05-15,V,35,street1,Alberto Franco
t2,1,s002,2002-05-17,V,32,street2,Gino Mcneill
t3,2,s003,1999-02-16,VI,33,street3,Ryan Parkes
t4,3,s001,1998-09-25,VI,30,street1,Eesha Hinton
t5,4,s002,2002-11-05,V,31,street2,Gino Mcneill
t6,5,s004,1997-09-15,VI,32,street4,David Parkes


In [240]:
# 11 Write a Pandas program to check if a specified value exists in single and multiple column index dataframe. 
's001' in dfi_10.index.levels[2]
't1' in dfi_10.index.levels[1]
5 in dfi_10.index.levels[0]
0 in dfi_2.index

True

In [247]:
# 12 Write a Pandas program to construct a series using the MultiIndex levels as the column and index.
pd.Series(data=dfi_10.index.levels[1], index=dfi_10.index.levels[0])

0    t1
1    t2
2    t3
3    t4
4    t5
5    t6
Name: t_id, dtype: object

In [253]:
# 13 Write a Pandas program to construct a DataFrame using the MultiIndex levels as the column and index.
sales_arrays = [['sale1', 'sale1', 'sale2', 'sale2', 'sale3', 'sale3', 'sale4', 'sale4'],
                ['city1', 'city2', 'city1', 'city2', 'city1', 'city2', 'city1', 'city2']]
sales_tuples = list(zip(*sales_arrays))
sales_index = pd.MultiIndex.from_tuples(sales_tuples, names=['sale', 'city'])
dfi_13 = pd.DataFrame(np.random.randn(8, 5), index=sales_index)
dfi_13

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4
sale,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
sale1,city1,1.951141,1.002342,0.173293,0.77779,-0.660995
sale1,city2,-2.342093,0.244515,-1.144848,0.237128,-0.741705
sale2,city1,-1.018008,0.674206,0.034493,-1.069721,2.378523
sale2,city2,0.556246,0.383582,-0.402176,-0.755101,1.36092
sale3,city1,0.288444,0.579547,0.155274,-2.00091,0.063818
sale3,city2,-0.39634,-0.067944,-0.895915,0.822342,-0.044722
sale4,city1,-1.229995,1.150978,-0.144325,0.570774,-0.434149
sale4,city2,-1.861166,0.334462,0.559051,-0.138844,0.509765


In [267]:
# 14 Write a Pandas program to extract a single row, rows and a specific value from a MultiIndex levels DataFrame.
dfi_13.loc[('sale3', 'city2')]
dfi_13.loc['sale3']
dfi_13.loc[('sale4', 'city1'), 4]

-0.43414915788899705

In [277]:
# 15 Write a Pandas program to rename names of columns and specific labels of the Main Index 
# of the MultiIndex dataframe. 
dfi_13.rename(columns={0: 'col1', 1: 'col2', 2: 'col3', 3: 'col4', 4: 'col5'})\
        .rename(index={'sale2': 'S2', 'city2': 'C2'})

Unnamed: 0_level_0,Unnamed: 1_level_0,col1,col2,col3,col4,col5
sale,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
sale1,city1,1.951141,1.002342,0.173293,0.77779,-0.660995
sale1,C2,-2.342093,0.244515,-1.144848,0.237128,-0.741705
S2,city1,-1.018008,0.674206,0.034493,-1.069721,2.378523
S2,C2,0.556246,0.383582,-0.402176,-0.755101,1.36092
sale3,city1,0.288444,0.579547,0.155274,-2.00091,0.063818
sale3,C2,-0.39634,-0.067944,-0.895915,0.822342,-0.044722
sale4,city1,-1.229995,1.150978,-0.144325,0.570774,-0.434149
sale4,C2,-1.861166,0.334462,0.559051,-0.138844,0.509765


In [285]:
# 16 Write a Pandas program to sort a MultiIndex of a DataFrame. Also sort on various levels of index.
dfi_13.sort_index(ascending=False)
dfi_13.sort_index(level=1, ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4
sale,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
sale4,city2,-1.861166,0.334462,0.559051,-0.138844,0.509765
sale3,city2,-0.39634,-0.067944,-0.895915,0.822342,-0.044722
sale2,city2,0.556246,0.383582,-0.402176,-0.755101,1.36092
sale1,city2,-2.342093,0.244515,-1.144848,0.237128,-0.741705
sale4,city1,-1.229995,1.150978,-0.144325,0.570774,-0.434149
sale3,city1,0.288444,0.579547,0.155274,-2.00091,0.063818
sale2,city1,-1.018008,0.674206,0.034493,-1.069721,2.378523
sale1,city1,1.951141,1.002342,0.173293,0.77779,-0.660995


In [292]:
# 17 Write a Pandas program to extract elements in the given positional indices along an axis of a dataframe.
dfi_13.take([1, 3, 5])
dfi_13.take([1, 2], axis=1)
dfi_13.take([-1, -2], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,4,3
sale,city,Unnamed: 2_level_1,Unnamed: 3_level_1
sale1,city1,-0.660995,0.77779
sale1,city2,-0.741705,0.237128
sale2,city1,2.378523,-1.069721
sale2,city2,1.36092,-0.755101
sale3,city1,0.063818,-2.00091
sale3,city2,-0.044722,0.822342
sale4,city1,-0.434149,0.570774
sale4,city2,0.509765,-0.138844


In [300]:
# 18 Write a Pandas program to get the index of an element of a given Series.
ds = pd.Series([1,3,5,7,9,11,13,15], index=[0,1,2,3,4,5,7,8])
ds[ds == 11].index[0]

5

In [310]:
# 19 Write a Pandas program to select a specific row of given series/dataframe by integer index. 
dfi_10.iloc[[5]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,day_of_birth,class,weight,address,name
Unnamed: 0_level_1,t_id,school_code,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5,t6,s004,1997-09-15,VI,32,street4,David Parkes


In [315]:
# 20 Write a Pandas program to find the indexes of rows of a specified value of a given column in a DataFrame.
dfi_10[dfi_10.day_of_birth == '2002-11-05'].index[0]

(4, 't5', 's002')

In [317]:
# 21 Write a Pandas program to drop a index level from a multi-level column index of a dataframe.
dfi_10.reset_index(level='school_code')

Unnamed: 0_level_0,Unnamed: 1_level_0,school_code,day_of_birth,class,weight,address,name
Unnamed: 0_level_1,t_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,t1,s001,2002-05-15,V,35,street1,Alberto Franco
1,t2,s002,2002-05-17,V,32,street2,Gino Mcneill
2,t3,s003,1999-02-16,VI,33,street3,Ryan Parkes
3,t4,s001,1998-09-25,VI,30,street1,Eesha Hinton
4,t5,s002,2002-11-05,V,31,street2,Gino Mcneill
5,t6,s004,1997-09-15,VI,32,street4,David Parkes


In [319]:
cols = pd.MultiIndex.from_tuples([('a', 'x'), ('a', 'y'), ('a', 'z')])
dfi_11 = pd.DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], columns=cols)
dfi_11

Unnamed: 0_level_0,a,a,a
Unnamed: 0_level_1,x,y,z
0,1,2,3
1,3,4,5
2,5,6,7


In [320]:
dfi_11.droplevel(0, axis=1)

Unnamed: 0,x,y,z
0,1,2,3
1,3,4,5
2,5,6,7


In [321]:
dfi_11.droplevel(1, axis=1)

Unnamed: 0,a,a.1,a.2
0,1,2,3
1,3,4,5
2,5,6,7


In [357]:
# 22 Write a Pandas program to insert a column at a specific index in a given DataFrame.
dfi_2.insert(loc=3, column='color', value=['red', 'green', 'blue', 'cyan', 'magenta', 'yellow'])
dfi_2

Unnamed: 0_level_0,day_of_birth,school_code,class,color,weight,address,t_id,name
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2002-05-15,s001,V,red,35,street1,t1,Alberto Franco
1,2002-05-17,s002,V,green,32,street2,t2,Gino Mcneill
2,1999-02-16,s003,VI,blue,33,street3,t3,Ryan Parkes
3,1998-09-25,s001,VI,cyan,29,street1,t4,Eesha Hinton
4,2002-11-05,s002,V,magenta,31,street2,t5,Gino Mcneill
5,1997-09-15,s004,VI,yellow,32,street4,t6,David Parkes


In [360]:
# 23 Write a Pandas program to print a DataFrame without index.
print(dfi_2.to_string(index=False))

day_of_birth school_code class   color  weight address t_id           name
  2002-05-15        s001     V     red      35 street1   t1 Alberto Franco
  2002-05-17        s002     V   green      32 street2   t2   Gino Mcneill
  1999-02-16        s003    VI    blue      33 street3   t3    Ryan Parkes
  1998-09-25        s001    VI    cyan      29 street1   t4   Eesha Hinton
  2002-11-05        s002     V magenta      31 street2   t5   Gino Mcneill
  1997-09-15        s004    VI  yellow      32 street4   t6   David Parkes


In [379]:
# 24 Write a Pandas program to find integer index of rows with missing data in a given dataframe.
dfi_24 = df74.reset_index().set_index(pd.Index(['t1', 't2', 't3', 't4', 't5', 't6']))
index = dfi_24.c1.index[df74.c1.isna()]
df_index = dfi_24.index.values.tolist()
print([df_index.index(i) for i in index])

[4]


In [383]:
# 25 Write a Pandas program to start index with different value rather than 0 in a given DataFrame. 
dfi_25 = dfi_24.reset_index()
dfi_25.index += 10
dfi_25.index
dfi_25

Unnamed: 0,level_0,index,c1,c2
10,t1,2000-01-03,120.0,7.0
11,t2,2000-01-04,130.0,
12,t3,2000-01-05,140.0,10.0
13,t4,2000-01-06,150.0,
14,t5,2000-01-07,,5.5
15,t6,2000-01-08,170.0,16.5


In [389]:
# 26 Write a Pandas program to select rows by filtering on one or more column(s) in a multi-index dataframe.
dfi_13.query('sale == "sale3"')
dfi_13.query(('sale == "sale4"') and ('city == "city2"'))

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4
sale,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
sale1,city2,-2.342093,0.244515,-1.144848,0.237128,-0.741705
sale2,city2,0.556246,0.383582,-0.402176,-0.755101,1.36092
sale3,city2,-0.39634,-0.067944,-0.895915,0.822342,-0.044722
sale4,city2,-1.861166,0.334462,0.559051,-0.138844,0.509765


# 101 Pandas Exercises for Data Analysis

https://www.machinelearningplus.com/python/101-pandas-exercises-python/

In [124]:
pd.__version__

'1.3.4'

In [125]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

TypeError: 'dict' object is not callable

In [None]:
mylist_series = pd.Series(mylist)
myarr_series = pd.Series(myarr)
mydict_series = pd.Series(mydict)

In [None]:
class Polygon():
    """
    Base shapes class not intended for direct use.
    """
    
    def __init__(self, num_sides):
        """Create a 'Polygon' object for storing shape properties and methods."""
        self.num_sides = num_sides
        
    @property
    def perimeter(self):
        """Compute the perimeter of the shape."""
        return self.perimeter 
    
    def __str__(self):
        print(f'Polygon with {self.num_sides} sides')
        
class Rectangle(Polygon):
    """Class for rectangle shape."""
    
    def __init__(self, height, width):
        """Create a rectangle shape with given sides' sizes."""
        super().__init__(4)
        self.height = height
        self.width = width
        
    @property
    def area(self):
        """Compute the area of the rectangle."""
        return self.height * self.width
        
    @property
    def perimeter(self):
        return (self.height + self.width) * 2

In [None]:
rectangle1 = Rectangle(5, 12)
print(rectangle1.height)
print(rectangle1.width)
print(rectangle1.num_sides)
print(rectangle1.area)
print(rectangle1.perimeter)

In [None]:
rectangle1.__str__()

In [None]:
class Square(Polygon):
    """Class for square shape."""
    
    def __init__(self, side):
        """Create a square shape with given side size."""
        super().__init__(4)
        self.side = side
        
    @property
    def area(self):
        """Compute the area of the square."""
        return self.side ** 2
        
    @property
    def perimeter(self):
        return self.side * 4

In [None]:
square1 = Square(5)
print(square1.side)
print(square1.num_sides)
print(square1.area)
print(square1.perimeter)

In [None]:
square1.side = 6
square1.area