In [1]:
import pandas as pd
import numpy as np

# Dataset
data = {
    "individual": [1,2,3,4,5,6,7,8,9,10],
    "sex": ["Male","Male","Male","Male","Male","Female","Female","Female","Female","Female"],
    "age": [21,43,22,86,60,100,np.nan,43,22,80],
    "IQ": [120,np.nan,135,150,92,130,150,np.nan,84,70],
    "depression": ["Yes","No","No","No","Yes","Yes","Yes","Yes","No","No"],
    "health": ["Very good","Very good","Average","Very poor","Good","Good","Very good","Average","Average","Good"],
    "weight": [150,160,135,140,110,110,120,120,105,100]
}

df = pd.DataFrame(data)

In [2]:
df

Unnamed: 0,individual,sex,age,IQ,depression,health,weight
0,1,Male,21.0,120.0,Yes,Very good,150
1,2,Male,43.0,,No,Very good,160
2,3,Male,22.0,135.0,No,Average,135
3,4,Male,86.0,150.0,No,Very poor,140
4,5,Male,60.0,92.0,Yes,Good,110
5,6,Female,100.0,130.0,Yes,Good,110
6,7,Female,,150.0,Yes,Very good,120
7,8,Female,43.0,,Yes,Average,120
8,9,Female,22.0,84.0,No,Average,105
9,10,Female,80.0,70.0,No,Good,100


In [3]:
# Encode categorical variables
df_encoded = df.copy()
df_encoded["sex"] = df_encoded["sex"].map({"Male":0,"Female":1})
df_encoded["depression"] = df_encoded["depression"].map({"No":0,"Yes":1})
df_encoded["health"] = df_encoded["health"].map({"Very poor":1,"Average":2,"Good":3,"Very good":4})

print("=== Original Data with NA ===")
print(df_encoded)

=== Original Data with NA ===
   individual  sex    age     IQ  depression  health  weight
0           1    0   21.0  120.0           1       4     150
1           2    0   43.0    NaN           0       4     160
2           3    0   22.0  135.0           0       2     135
3           4    0   86.0  150.0           0       1     140
4           5    0   60.0   92.0           1       3     110
5           6    1  100.0  130.0           1       3     110
6           7    1    NaN  150.0           1       4     120
7           8    1   43.0    NaN           1       2     120
8           9    1   22.0   84.0           0       2     105
9          10    1   80.0   70.0           0       3     100


In [8]:
# ---- Q1: Correlation & Covariance (without filling NA) ----
corr_matrix_q1 = df_encoded.corr(numeric_only=True)
cov_matrix_q1 = df_encoded.cov(numeric_only=True)

corr_matrix_q1


Unnamed: 0,individual,sex,age,IQ,depression,health,weight
individual,1.0,0.8703883,0.282856,-0.563507,-0.034816,-0.2487342,-0.877837
sex,0.870388,1.0,0.26049,-0.274175,0.2,3.456006e-16,-0.727825
age,0.282856,0.2604902,1.0,0.120651,0.094724,-0.1497714,-0.345642
IQ,-0.563507,-0.2741749,0.120651,1.0,0.230655,-0.07303901,0.657183
depression,-0.034816,0.2,0.094724,0.230655,1.0,0.4082483,-0.155963
health,-0.248734,3.456006e-16,-0.149771,-0.073039,0.408248,1.0,0.212238
weight,-0.877837,-0.7278253,-0.345642,0.657183,-0.155963,0.2122382,1.0


In [9]:
cov_matrix_q1

Unnamed: 0,individual,sex,age,IQ,depression,health,weight
individual,9.166667,1.388889,26.875,-52.267857,-0.055556,-0.7777778,-53.888889
sex,1.388889,0.2777778,4.125,-4.5,0.055556,1.881211e-16,-7.777778
age,26.875,4.125,902.75,122.261905,1.5,-4.5,-222.5
IQ,-52.267857,-4.5,122.261905,942.839286,3.785714,-2.321429,368.75
depression,-0.055556,0.05555556,1.5,3.785714,0.277778,0.2222222,-1.666667
health,-0.777778,1.881211e-16,-4.5,-2.321429,0.222222,1.066667,4.444444
weight,-53.888889,-7.777778,-222.5,368.75,-1.666667,4.444444,411.111111


In [10]:
# ---- Q2: Fill missing values with mean ----
df_filled = df_encoded.fillna(df_encoded.mean(numeric_only=True))

corr_matrix_q2 = df_filled.corr(numeric_only=True)
cov_matrix_q2 = df_filled.cov(numeric_only=True)

In [11]:
df_filled

Unnamed: 0,individual,sex,age,IQ,depression,health,weight
0,1,0,21.0,120.0,1,4,150
1,2,0,43.0,116.375,0,4,160
2,3,0,22.0,135.0,0,2,135
3,4,0,86.0,150.0,0,1,140
4,5,0,60.0,92.0,1,3,110
5,6,1,100.0,130.0,1,3,110
6,7,1,53.0,150.0,1,4,120
7,8,1,43.0,116.375,1,2,120
8,9,1,22.0,84.0,0,2,105
9,10,1,80.0,70.0,0,3,100


In [12]:
corr_matrix_q2

Unnamed: 0,individual,sex,age,IQ,depression,health,weight
individual,1.0,0.8703883,0.278537,-0.495836,-0.034816,-0.2487342,-0.877837
sex,0.870388,1.0,0.245593,-0.245229,0.2,3.456006e-16,-0.727825
age,0.278537,0.2455926,1.0,0.092339,0.089306,-0.1367219,-0.344342
IQ,-0.495836,-0.2452295,0.092339,1.0,0.206304,-0.06455797,0.52235
depression,-0.034816,0.2,0.089306,0.206304,1.0,0.4082483,-0.155963
health,-0.248734,3.456006e-16,-0.136722,-0.064558,0.408248,1.0,0.212238
weight,-0.877837,-0.7278253,-0.344342,0.52235,-0.155963,0.2122382,1.0


In [13]:
cov_matrix_q2

Unnamed: 0,individual,sex,age,IQ,depression,health,weight
individual,9.166667,1.388889,23.888889,-40.652778,-0.055556,-0.777778,-53.888889
sex,1.388889,0.277778,3.666667,-3.5,0.055556,0.0,-7.777778
age,23.888889,3.666667,802.444444,70.833333,1.333333,-4.0,-197.777778
IQ,-40.652778,-3.5,70.833333,733.319444,2.944444,-1.805556,286.805556
depression,-0.055556,0.055556,1.333333,2.944444,0.277778,0.222222,-1.666667
health,-0.777778,0.0,-4.0,-1.805556,0.222222,1.066667,4.444444
weight,-53.888889,-7.777778,-197.777778,286.805556,-1.666667,4.444444,411.111111
