In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
data_path = input("Enter dataset CSV file path: ").strip()
df = pd.read_csv(data_path)
print(" Dataset loaded with shape:", df.shape)

# Keep only numeric columns
df = df.select_dtypes(include=[np.number])
numeric_cols = df.columns.tolist()
print("Numeric columns available:", numeric_cols)

# Create folder for histogram images
os.makedirs("histograms", exist_ok=True)

✅ Dataset loaded with shape: (768, 9)
Numeric columns available: ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']


In [None]:
num_bins = 10
histograms = {}
for col in df.columns:
    counts, bins = np.histogram(df[col], bins=num_bins)
    histograms[col] = {"counts": counts, "bins": bins}

print("\n Histograms computed for numeric columns.")


✅ Histograms computed for numeric columns.


In [None]:
cov_matrix = np.cov(df.values, rowvar=False)
precision_matrix = np.linalg.pinv(cov_matrix)  # Pseudo-inverse for stability

relation_matrix = pd.DataFrame(index=df.columns, columns=df.columns)
for i, col1 in enumerate(df.columns):
    for j, col2 in enumerate(df.columns):
        if i == j:
            relation_matrix.loc[col1, col2] = 0
        else:
            val = precision_matrix[i, j]
            if val > 0:
                relation_matrix.loc[col1, col2] = 1
            elif val < 0:
                relation_matrix.loc[col1, col2] = -1
            else:
                relation_matrix.loc[col1, col2] = 0

relation_matrix = relation_matrix.astype(int)
relation_matrix.to_csv("relation_matrix.csv", index=True)

print("\n Relation matrix computed and saved as relation_matrix.csv")


✅ Relation matrix computed and saved as relation_matrix.csv
