In [None]:
# 1. Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 2. Define missing value formats and read the dataset
missing_value_formats = ["n.a.", "?", "NA", "n/a", "na", "--"]
df = pd.read_csv("academic_data.csv", na_values=missing_value_formats)

# 3. View the first few rows of the dataset
print("First 5 rows:\n", df.head())

# 4. Check for nulls in 'Gender' column
print("\nGender column:\n", df['Gender'].head(10))
print("\nIs Null:\n", df['Gender'].isnull().head(10))
print("\nIs Not Null:\n", df['Gender'].notnull().head(10))

# 5. Filter rows where 'Gender' is NOT null
null_filter = df['Gender'].notnull()
print("\nRows with Gender not null:\n", df[null_filter])

# 6. Drop rows/columns with missing values
df.dropna(axis=0, inplace=True)  # drop rows with any null value
# Alternative options:
# df.dropna(axis=0, how='all')     # Drop rows where all values are null
# df.dropna(axis=1, how='any')     # Drop columns with any nulls
# df.dropna(axis=1, how='all')     # Drop columns with all nulls

# 7. Fill missing values
df['SPOS'].fillna(0, inplace=True)                         # Fill SPOS nulls with 0
df['DSBDA'].fillna(method='pad', inplace=True)             # Forward fill for DSBDA
df['SPOS'].fillna(method='bfill', inplace=True)            # Backward fill for SPOS

# 8. Detect outliers using box plot
plt.boxplot(df['SPOS'])
plt.title("Boxplot of SPOS")
plt.show()
