In [5]:
import pandas as pd

# Load the dataset
file_path = 'dementia_dataset.csv'
dementia_data = pd.read_csv(file_path)

# Display the first few rows of the dataset for inspection
dementia_data.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [6]:
# Data Preparation Steps

# 1. Handling Missing Data
# Checking for missing values in SES, MMSE, and nWBV
missing_values = dementia_data[['SES', 'MMSE', 'nWBV']].isnull().sum()
missing_values


SES     19
MMSE     2
nWBV     0
dtype: int64

In [7]:
# 2. Data Cleaning
# Checking the data types and unique values in SES
ses_types = dementia_data['SES'].dtype
ses_types


dtype('float64')

In [8]:
unique_ses = dementia_data['SES'].unique()
unique_ses

array([ 2., nan,  3.,  4.,  1.,  5.])

In [11]:
# Imputation Strategy: Using median SES values for groups defined by similar education levels (EDUC)
# First, calculate the median SES for each EDUC level
median_ses_per_educ = dementia_data.groupby('EDUC')['SES'].median()
median_ses_per_educ


EDUC
6     4.0
8     5.0
11    4.0
12    3.0
13    2.0
14    3.0
15    2.0
16    2.0
17    1.0
18    2.0
20    1.0
23    1.0
Name: SES, dtype: float64

In [12]:

# Function to impute SES based on EDUC
def impute_ses(row):
    if pd.isna(row['SES']) and not pd.isna(row['EDUC']):
        return median_ses_per_educ[row['EDUC']]
    return row['SES']

# Applying the imputation function
dementia_data['SES'] = dementia_data.apply(impute_ses, axis=1)

# Checking if there are any remaining missing SES values
remaining_missing_ses = dementia_data['SES'].isnull().sum()
remaining_missing_ses


0