In [None]:
#Direct Lingam helps us understand the correct causal structure among observed variables in a dataset.
#It is particularly useful in scenarios where we suspect that there are no hidden confounders and the relationships are linear with non-Gaussian noise.
#Assumption of non-Gaussianity is crucial as it allows the algorithm to identify the correct causal directions, which is not possible with Gaussian noise.

In [15]:
# --- Imports ---
import pandas as pd
import numpy as np
from lingam import DirectLiNGAM

In [16]:
#import graphutilz as guz  # Assuming this package is installed
import networkx as nx

In [32]:
data = pd.read_csv("C:/Users/sahil/Downloads/archive/Student_Performance.csv")

In [33]:
test = data.copy()

In [34]:
test = test.drop(['Extracurricular Activities'], axis = 1)
def standardise(x):
    return (x -x.mean())/(x.max() - x.min())
test = test.apply(standardise, axis = 1)
test.corr()

Unnamed: 0,Hours Studied,Previous Scores,Sleep Hours,Sample Question Papers Practiced,Performance Index
Hours Studied,1.0,0.036465,0.324943,-0.176584,-0.322022
Previous Scores,0.036465,1.0,0.581578,0.749652,-0.836131
Sleep Hours,0.324943,0.581578,1.0,0.442572,-0.859907
Sample Question Papers Practiced,-0.176584,0.749652,0.442572,1.0,-0.770127
Performance Index,-0.322022,-0.836131,-0.859907,-0.770127,1.0


In [35]:
import random


def introduce_noise(x):
    return x + random.randint(0,10)/100

test = test.apply(introduce_noise, axis = 0)

In [36]:
test_np = test.to_numpy()
test_np

array([[-0.30102041,  0.6677551 , -0.27061224, -0.4022449 ,  0.57612245],
       [-0.2925    ,  0.7125    , -0.2825    , -0.3575    ,  0.49      ],
       [-0.24795918,  0.65959184, -0.25836735, -0.41040816,  0.52714286],
       ...,
       [-0.32435897,  0.69282051, -0.28871795, -0.37717949,  0.5674359 ],
       [-0.28608247,  0.65113402, -0.29670103, -0.41886598,  0.62051546],
       [-0.2760274 ,  0.67178082, -0.25232877, -0.39821918,  0.52479452]])

In [37]:
# --- 2. Run DirectLiNGAM analysis ---
model = DirectLiNGAM()
model.fit(test_np)

<lingam.direct_lingam.DirectLiNGAM at 0x2b381a6b560>

In [38]:
# --- 3. Get results ---
causal_order = model.causal_order_
adjacency_matrix = model.adjacency_matrix_

print("Causal order:", causal_order)
print("Adjacency matrix:\n", adjacency_matrix)


Causal order: [2, 1, 3, 0, 4]
Adjacency matrix:
 [[ 0.          0.12261344  0.25273849 -0.26585005  0.        ]
 [ 0.          0.          0.31288919  0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          1.34993536  0.          0.          0.        ]
 [-1.         -1.         -1.         -1.          0.        ]]


In [39]:
for i in range(len(adjacency_matrix)):
    for j in range(len(adjacency_matrix[i])):
        if adjacency_matrix[i][j] != 0:
            print(list(test)[i], '->', list(test)[j], "\t\t\t\tWeight", round(adjacency_matrix[i][j],1))

Hours Studied -> Previous Scores 				Weight 0.1
Hours Studied -> Sleep Hours 				Weight 0.3
Hours Studied -> Sample Question Papers Practiced 				Weight -0.3
Previous Scores -> Sleep Hours 				Weight 0.3
Sample Question Papers Practiced -> Previous Scores 				Weight 1.3
Performance Index -> Hours Studied 				Weight -1.0
Performance Index -> Previous Scores 				Weight -1.0
Performance Index -> Sleep Hours 				Weight -1.0
Performance Index -> Sample Question Papers Practiced 				Weight -1.0
