In [1]:
import pandas as pd

def clean_unicef_mn(input_path='mn.csv', output_csv='mn_clean.csv', output_json='mn_clean.json'):
    df = pd.read_csv(input_path, header=0, dtype=str)

    #drop empty rows
    df.dropna(how='all', inplace=True)

    #remove whitespace
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

    #remove duplicates
    df.drop_duplicates(inplace=True)

    #convert numeric columns
    for col in df.columns:
        if col not in ['Country', 'Year']:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    #handle missing numeric data
    num_cols = df.select_dtypes(include=['float', 'int']).columns
    df[num_cols] = df[num_cols].fillna(df[num_cols].median())
    #index reset
    df.reset_index(drop=True, inplace=True)

    #save cleaned data
    df.to_csv(output_csv, index=False)
    df.to_json(output_json, orient='records', indent=2)

    print(f"Cleaned data saved to '{output_csv}' and '{output_json}'")

if __name__ == '__main__':
    clean_unicef_mn()

  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


✅ Cleaned data saved to 'mn_clean.csv' and 'mn_clean.json'


In [2]:
df = pd.read_csv("mn_clean.csv")

#print summary statistics
print(df.describe())

#check remaining missing values
print(df.isnull().sum())

#view structure
print(df.head())

        Unnamed: 0          HH1          HH2           LN         MWM1  \
count  9008.000000  9008.000000  9008.000000  9008.000000  9008.000000   
mean   4504.500000   346.074267    13.161412     2.150977   346.074267   
std    2600.529946   198.366238     6.918993     1.679250   198.366238   
min       1.000000     1.000000     1.000000     1.000000     1.000000   
25%    2252.750000   174.000000     7.000000     1.000000   174.000000   
50%    4504.500000   342.000000    13.000000     1.000000   342.000000   
75%    6756.250000   515.250000    19.000000     3.000000   515.250000   
max    9008.000000   682.000000    25.000000    16.000000   682.000000   

              MWM2         MWM4         MWM5        MWM6D        MWM6M  ...  \
count  9008.000000  9008.000000  9008.000000  9008.000000  9008.000000  ...   
mean     13.161412     2.150977   488.643206    14.587145     3.199156  ...   
std       6.918993     1.679250   296.319073     9.200779     0.601127  ...   
min       1.00000