In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'ID': ['H342', 'H543', 'H129', 'H309', 'H219'],
                   'Mathematics': [np.nan, 18, np.nan, 19, 20],
                   'Physics': [19, 18, np.nan, 10, np.nan],
                   'Arts': [17, np.nan, 18, 10, 15]},
                  index=['Glenn', 'Maria', 'Tim', 'Robert', 'Eric'])
df

# In Python, np.nan is used to declare missing numerical values in an array 
# Since numpy is used to deal with arrays in Python, so we initialize this way

Unnamed: 0,ID,Mathematics,Physics,Arts
Glenn,H342,,19.0,17.0
Maria,H543,18.0,18.0,
Tim,H129,,,18.0
Robert,H309,19.0,10.0,10.0
Eric,H219,20.0,,15.0


### Task1: Locate all missing data

In [2]:
t1 = df.isna()
t1

Unnamed: 0,ID,Mathematics,Physics,Arts
Glenn,False,True,False,False
Maria,False,False,False,True
Tim,False,True,True,False
Robert,False,False,False,False
Eric,False,False,True,False


### Task2: For which students are there missing marks? For which courses are there missing marks? 
#### Find rows (or columns) with missing data? Give a list of rows and columns with missing data?

In [23]:
t2 = df.isna().any(axis=1)      # this means is there any missing data in rows?
t2

Glenn      True
Maria      True
Tim        True
Robert    False
Eric       True
dtype: bool

In [24]:
t2a = t2.loc[t2 == True]
t2a

Glenn    True
Maria    True
Tim      True
Eric     True
dtype: bool

In [25]:
t2b = t2.loc[t2 == True].index.tolist()
t2b

['Glenn', 'Maria', 'Tim', 'Eric']

### Task3: How many exams did each student miss? or how many students missed an specific exam?
#### Count the number of missing data in each row or column

In [6]:
t3 = df.isna().sum(axis=1)
t3

Glenn     1
Maria     1
Tim       2
Robert    0
Eric      1
dtype: int64

In [7]:
t3.loc[t3 > 1]

Tim    2
dtype: int64

In [26]:
t3.loc['Eric']

1

### Task4: Find out for which courses Tim did not receive a mark?
#### Find out to which columns missing data in a row belong

In [9]:
t4 = df.loc['Tim'].isna()
t4

ID             False
Mathematics     True
Physics         True
Arts           False
Name: Tim, dtype: bool

In [10]:
t4b = t4.loc[t4 == True].index.tolist()
t4b

['Mathematics', 'Physics']

### Task5: Find out which students did not receive a mark for Arts?
#### Find out to which rows missing data in a column belong

In [11]:
t5 = df['Arts'].isna()
t5

Glenn     False
Maria      True
Tim       False
Robert    False
Eric      False
Name: Arts, dtype: bool

In [12]:
t5a = t5.loc[t5 == True].index.tolist()
t5a

['Maria']

### Task 6: Give a list of courses for which each student did not receive a mark
#### Same as task 4 but for all rows

In [27]:
names = df.index.tolist()
for name in names:
    series = df.loc[name].isna()
    missing = series.loc[series == True].index.tolist()
    print('{}: {}'.format(name, missing))

Glenn: ['Mathematics']
Maria: ['Arts']
Tim: ['Mathematics', 'Physics']
Robert: []
Eric: ['Physics']


### Task 7: Give a list of students with missing marks for each course
#### Same as task 5 but for all columns

In [28]:
cols = df.columns.tolist()
for col in cols:
    series = df[col].isna()
    missing = series.loc[series == True].index.tolist()
    print('{}: {}'.format(col, missing))

ID: []
Mathematics: ['Glenn', 'Tim']
Physics: ['Tim', 'Eric']
Arts: ['Maria']
