In [1]:
import pandas as pd
import numpy as np

exam_data = {
    'name': ['Anastasia','Dima','Katherine','James','Emily','Michael','Matthew','Laura','Kevin','Jonas'],
    'score': [12.5,9,16.5,np.nan,9,20,14.5,np.nan,8,19],
    'attempts': [1,3,2,3,2,3,1,1,2,1],
    'qualify': ['yes','no','yes','no','no','yes','yes','no','no','yes']
}

labels = ['a','b','c','d','e','f','g','h','i','j']

In [2]:
# Q1 Create DataFrame
df = pd.DataFrame(exam_data, index=labels)

In [3]:
# Q2 Basic info
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   name      10 non-null     object 
 1   score     8 non-null      float64
 2   attempts  10 non-null     int64  
 3   qualify   10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes
None


In [4]:
# Q3 First 3 rows
print(df.head(3))

        name  score  attempts qualify
a  Anastasia   12.5         1     yes
b       Dima    9.0         3      no
c  Katherine   16.5         2     yes


In [5]:
# Q4 Select name & score
print(df[['name','score']])

        name  score
a  Anastasia   12.5
b       Dima    9.0
c  Katherine   16.5
d      James    NaN
e      Emily    9.0
f    Michael   20.0
g    Matthew   14.5
h      Laura    NaN
i      Kevin    8.0
j      Jonas   19.0


In [6]:
# Q5 Specific rows & columns
print(df.loc[['a','b'], ['name','score']])

        name  score
a  Anastasia   12.5
b       Dima    9.0


In [7]:
# Q6 Attempts > 2
print(df[df['attempts'] > 2])

      name  score  attempts qualify
b     Dima    9.0         3      no
d    James    NaN         3      no
f  Michael   20.0         3     yes


In [8]:
# Q7 Row & column count
print(df.shape)

(10, 4)


In [9]:
# Q8 Score is NaN
print(df[df['score'].isna()])

    name  score  attempts qualify
d  James    NaN         3      no
h  Laura    NaN         1      no


In [10]:
# Q9 Score between 15 and 20
print(df[df['score'].between(15,20)])

        name  score  attempts qualify
c  Katherine   16.5         2     yes
f    Michael   20.0         3     yes
j      Jonas   19.0         1     yes


In [11]:
# Q10 Attempts < 2 and score > 15
print(df[(df['attempts'] < 2) & (df['score'] > 15)])

    name  score  attempts qualify
j  Jonas   19.0         1     yes


In [12]:
# Q11 Change score of row d
df.loc['d','score'] = 11.5

In [13]:
# Q12 Sum of attempts
print(df['attempts'].sum())

19


In [14]:
# Q13 Mean score
print(df['score'].mean())

13.333333333333334


In [15]:
# Q14 Append new row and delete
df.loc['k'] = ['NewStudent', 10, 1, 'yes']
df = df.drop('k')

In [16]:
# Q15 Sort
df = df.sort_values(by=['name','score'], ascending=[False,True])

In [17]:
# Q16 Replace yes/no with True/False
df['qualify'] = df['qualify'].map({'yes': True, 'no': False})

In [18]:
# Q17 Change James to Suresh
df['name'] = df['name'].replace('James', 'Suresh')

In [19]:
# Q18 Delete attempts column
df = df.drop(columns=['attempts'])

In [20]:
# Q19 Insert new column
df['Grade'] = ['B','C','A','B','C','A','B','C','C','A']

In [21]:
# Q20 Iterate rows
for index, row in df.iterrows():
    print(index, row['name'], row['score'])

f Michael 20.0
g Matthew 14.5
h Laura nan
i Kevin 8.0
c Katherine 16.5
j Jonas 19.0
d Suresh 11.5
e Emily 9.0
b Dima 9.0
a Anastasia 12.5


In [22]:
# Q21 Column header list
print(list(df.columns))

['name', 'score', 'qualify', 'Grade']


In [23]:
print("\nFinal DataFrame:\n")
print(df)


Final DataFrame:

        name  score  qualify Grade
f    Michael   20.0     True     B
g    Matthew   14.5     True     C
h      Laura    NaN    False     A
i      Kevin    8.0    False     B
c  Katherine   16.5     True     C
j      Jonas   19.0     True     A
d     Suresh   11.5    False     B
e      Emily    9.0    False     C
b       Dima    9.0    False     C
a  Anastasia   12.5     True     A
