In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import csv
from pyvis.network import Network
import os

In [10]:
class Visualization:
    def __init__(self, p_values_file='atr_p_values.csv'):
        self.df = pd.read_csv(p_values_file)

        self.clusters = {
            0: ['Depressed.At.Baseline', 'Loneliness', 'Social.Isolation', 'PHQ9.No.Info', 'PHQ9.Screen', 'PHQ9.Items', 'PHQ9.Severity', 'CIDI.MDD.No.Info', 'CIDI.MDD.Screen', 'CIDI.MDD.Response', 'CIDI.MDD.Severity', 'GAD.CIDI.Somatic'],
            1: ['CVD'],
            2: ['Apolipoprotein_A1', 'Apolipoprotein_B'],
            3: ['Concentration_of_Chylomicrons_and_Extremely_Large_VLDL_Particles', 'Concentration_of_HDL_Particles', 'Concentration_of_IDL_Particles', 'Concentration_of_Large_HDL_Particles', 'Concentration_of_Large_LDL_Particles', 'Concentration_of_Large_VLDL_Particles', 'Concentration_of_LDL_Particles', 'Concentration_of_Medium_HDL_Particles', 'Concentration_of_Medium_LDL_Particles', 'Concentration_of_Medium_VLDL_Particles', 'Concentration_of_Small_HDL_Particles', 'Concentration_of_Small_LDL_Particles', 'Concentration_of_Small_VLDL_Particles', 'Concentration_of_Very_Large_HDL_Particles', 'Concentration_of_Very_Large_VLDL_Particles', 'Concentration_of_Very_Small_VLDL_Particles', 'Concentration_of_VLDL_Particles', 'Total_Concentration_of_Lipoprotein_Particles'],
            4: ['Average_Diameter_for_HDL_Particles', 'Average_Diameter_for_LDL_Particles', 'Average_Diameter_for_VLDL_Particles'],
            5: ['Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesterol_in_IDL', 'Cholesterol_in_Large_HDL', 'Cholesterol_in_Large_LDL', 'Cholesterol_in_Large_VLDL', 'Cholesterol_in_Medium_HDL', 'Cholesterol_in_Medium_LDL', 'Cholesterol_in_Medium_VLDL', 'Cholesterol_in_Small_HDL', 'Cholesterol_in_Small_LDL', 'Cholesterol_in_Small_VLDL', 'Cholesterol_in_Very_Large_HDL', 'Cholesterol_in_Very_Large_VLDL', 'Cholesterol_in_Very_Small_VLDL', 'Clinical_LDL_Cholesterol', 'HDL_Cholesterol', 'LDL_Cholesterol', 'VLDL_Cholesterol', 'Remnant_Cholesterol_(Non-HDL,_Non-LDL_-Cholesterol)', 'Total_Cholesterol', 'Total_Cholesterol_Minus_HDL-C'],
            6: ['Cholesteryl_Esters_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesteryl_Esters_in_HDL', 'Cholesteryl_Esters_in_IDL', 'Cholesteryl_Esters_in_Large_HDL', 'Cholesteryl_Esters_in_Large_LDL', 'Cholesteryl_Esters_in_Large_VLDL', 'Cholesteryl_Esters_in_LDL', 'Cholesteryl_Esters_in_Medium_HDL', 'Cholesteryl_Esters_in_Medium_LDL', 'Cholesteryl_Esters_in_Medium_VLDL', 'Cholesteryl_Esters_in_Small_HDL', 'Cholesteryl_Esters_in_Small_LDL', 'Cholesteryl_Esters_in_Small_VLDL', 'Cholesteryl_Esters_in_Very_Large_HDL', 'Cholesteryl_Esters_in_Very_Large_VLDL', 'Cholesteryl_Esters_in_Very_Small_VLDL', 'Cholesteryl_Esters_in_VLDL', 'Total_Esterified_Cholesterol'],
            7: ['Free_Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Free_Cholesterol_in_HDL', 'Free_Cholesterol_in_IDL', 'Free_Cholesterol_in_Large_HDL', 'Free_Cholesterol_in_Large_LDL', 'Free_Cholesterol_in_Large_VLDL', 'Free_Cholesterol_in_LDL', 'Free_Cholesterol_in_Medium_HDL', 'Free_Cholesterol_in_Medium_LDL', 'Free_Cholesterol_in_Medium_VLDL', 'Free_Cholesterol_in_Small_HDL', 'Free_Cholesterol_in_Small_LDL', 'Free_Cholesterol_in_Small_VLDL', 'Free_Cholesterol_in_Very_Large_HDL', 'Free_Cholesterol_in_Very_Large_VLDL', 'Free_Cholesterol_in_Very_Small_VLDL', 'Free_Cholesterol_in_VLDL', 'Total_Free_Cholesterol'],
            8: ['Phospholipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Phospholipids_in_HDL', 'Phospholipids_in_IDL', 'Phospholipids_in_Large_HDL', 'Phospholipids_in_Large_LDL', 'Phospholipids_in_Large_VLDL', 'Phospholipids_in_LDL', 'Phospholipids_in_Medium_HDL', 'Phospholipids_in_Medium_LDL', 'Phospholipids_in_Medium_VLDL', 'Phospholipids_in_Small_HDL', 'Phospholipids_in_Small_LDL', 'Phospholipids_in_Small_VLDL', 'Phospholipids_in_Very_Large_HDL', 'Phospholipids_in_Very_Large_VLDL', 'Phospholipids_in_Very_Small_VLDL', 'Phospholipids_in_VLDL', 'Total_Phospholipids_in_Lipoprotein_Particles'],
            9: ['Triglycerides_in_Chylomicrons_and_Extremely_Large_VLDL', 'Triglycerides_in_HDL', 'Triglycerides_in_IDL', 'Triglycerides_in_Large_HDL', 'Triglycerides_in_Large_LDL', 'Triglycerides_in_Large_VLDL', 'Triglycerides_in_LDL', 'Triglycerides_in_Medium_HDL', 'Triglycerides_in_Medium_LDL', 'Triglycerides_in_Medium_VLDL', 'Triglycerides_in_Small_HDL', 'Triglycerides_in_Small_LDL', 'Triglycerides_in_Small_VLDL', 'Triglycerides_in_Very_Large_HDL', 'Triglycerides_in_Very_Large_VLDL', 'Triglycerides_in_Very_Small_VLDL', 'Triglycerides_in_VLDL', 'Total_Triglycerides'],
            10: ['Total_Lipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Total_Lipids_in_HDL', 'Total_Lipids_in_IDL', 'Total_Lipids_in_Large_HDL', 'Total_Lipids_in_Large_LDL', 'Total_Lipids_in_Large_VLDL', 'Total_Lipids_in_LDL', 'Total_Lipids_in_Lipoprotein_Particles', 'Total_Lipids_in_Medium_HDL', 'Total_Lipids_in_Medium_LDL', 'Total_Lipids_in_Medium_VLDL', 'Total_Lipids_in_Small_HDL', 'Total_Lipids_in_Small_LDL', 'Total_Lipids_in_Small_VLDL', 'Total_Lipids_in_Very_Large_HDL', 'Total_Lipids_in_Very_Large_VLDL', 'Total_Lipids_in_Very_Small_VLDL', 'Total_Lipids_in_VLDL'],
            11: ['Glycoprotein_Acetyls']
        }

        self.nx_graph = nx.Graph()
        self.edges_with_p_values = []
        self.threshold = 0.8
        self.pyvis_net = Network(notebook=True, height="750px", width="100%")

    def load_data(self):
        self.df = pd.read_csv('data/atr_p_values.csv')
    
    def process_data(self):
        for _, row in self.df.iterrows():
            dep_node = row['Depression Attribute']
            nmr_node = row['NMR Attribute']
            cvd_node = row['CVD Attribute']
            p_value = row['P-Value']

            if dep_node in self.nx_graph.nodes and nmr_node in self.nx_graph.nodes:
                self.edges_with_p_values.append((dep_node, nmr_node, p_value))
            if cvd_node in self.nx_graph.nodes and nmr_node in self.nx_graph.nodes:
                self.edges_with_p_values.append((cvd_node, nmr_node, p_value))

        self.edges_with_p_values.sort(key=lambda x: x[2])
        
    def create_graph(self):
        node_clusters = {}
        for cluster_id, features in self.clusters.items():
            for feature in features:
                color = plt.cm.tab20(cluster_id / len(self.clusters))[:3]  # normalize colors to RGB
                encoded_feature = feature.encode('utf-8', 'ignore').decode('utf-8')
                self.nx_graph.add_node(encoded_feature, color='rgb({}, {}, {})'.format(int(color[0]*255), int(color[1]*255), int(color[2]*255)), title=feature)
                node_clusters[encoded_feature] = cluster_id
        
        filtered_edges = [edge for edge in self.edges_with_p_values if edge[2] > self.threshold]
        
        # filtered edges
        for edge in filtered_edges:
            node_i, node_j, p_value = edge
            self.nx_graph.add_edge(node_i, node_j, p_value=p_value)
        
    def visualize_network(self):
        self.pyvis_net.from_nx(self.nx_graph)
        
        for edge in self.pyvis_net.edges:
            if edge['from'] == 'CVD' or edge['to'] == 'CVD':
                edge['color'] = 'pink'
            else:
                edge['color'] = 'lightblue'
        
        self.pyvis_net.show_buttons(filter_=['physics'])
    
    def save_visualization(self, output_file="network_visualization.html"):
        # Save visualization to HTML file
        self.pyvis_net.show(output_file)
        print(f"Visualization saved to {output_file}")


outputs/network_visualization.html
Visualization saved to outputs/network_visualization.html


In [None]:
if __name__ == "__main__":
    visualizer = Visualization()
    visualizer.load_data()
    visualizer.process_data()
    visualizer.create_graph()
    visualizer.visualize_network()
    visualizer.save_visualization()

CVD Significant Clusters

In [3]:
class CVD_Sig_Clusters:
    def __init__(self, p_values_file='atr_p_values.csv'):
        self.df = pd.read_csv(p_values_file)
        self.clusters = {
            0: ['Depressed.At.Baseline', 'Loneliness', 'Social.Isolation', 'PHQ9.No.Info', 'PHQ9.Screen', 'PHQ9.Items', 'PHQ9.Severity', 'CIDI.MDD.No.Info', 'CIDI.MDD.Screen', 'CIDI.MDD.Response', 'CIDI.MDD.Severity', 'GAD.CIDI.Somatic'],
            1: ['CVD'],
            2: ['Apolipoprotein_A1', 'Apolipoprotein_B'],
            3: ['Concentration_of_Chylomicrons_and_Extremely_Large_VLDL_Particles', 'Concentration_of_HDL_Particles', 'Concentration_of_IDL_Particles', 'Concentration_of_Large_HDL_Particles', 'Concentration_of_Large_LDL_Particles', 'Concentration_of_Large_VLDL_Particles', 'Concentration_of_LDL_Particles', 'Concentration_of_Medium_HDL_Particles', 'Concentration_of_Medium_LDL_Particles', 'Concentration_of_Medium_VLDL_Particles', 'Concentration_of_Small_HDL_Particles', 'Concentration_of_Small_LDL_Particles', 'Concentration_of_Small_VLDL_Particles', 'Concentration_of_Very_Large_HDL_Particles', 'Concentration_of_Very_Large_VLDL_Particles', 'Concentration_of_Very_Small_VLDL_Particles', 'Concentration_of_VLDL_Particles', 'Total_Concentration_of_Lipoprotein_Particles'],
            4: ['Average_Diameter_for_HDL_Particles', 'Average_Diameter_for_LDL_Particles', 'Average_Diameter_for_VLDL_Particles'],
            5: ['Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesterol_in_IDL', 'Cholesterol_in_Large_HDL', 'Cholesterol_in_Large_LDL', 'Cholesterol_in_Large_VLDL', 'Cholesterol_in_Medium_HDL', 'Cholesterol_in_Medium_LDL', 'Cholesterol_in_Medium_VLDL', 'Cholesterol_in_Small_HDL', 'Cholesterol_in_Small_LDL', 'Cholesterol_in_Small_VLDL', 'Cholesterol_in_Very_Large_HDL', 'Cholesterol_in_Very_Large_VLDL', 'Cholesterol_in_Very_Small_VLDL', 'Clinical_LDL_Cholesterol', 'HDL_Cholesterol', 'LDL_Cholesterol', 'VLDL_Cholesterol', 'Remnant_Cholesterol_(Non-HDL,_Non-LDL_-Cholesterol)', 'Total_Cholesterol', 'Total_Cholesterol_Minus_HDL-C'],
            6: ['Cholesteryl_Esters_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesteryl_Esters_in_HDL', 'Cholesteryl_Esters_in_IDL', 'Cholesteryl_Esters_in_Large_HDL', 'Cholesteryl_Esters_in_Large_LDL', 'Cholesteryl_Esters_in_Large_VLDL', 'Cholesteryl_Esters_in_LDL', 'Cholesteryl_Esters_in_Medium_HDL', 'Cholesteryl_Esters_in_Medium_LDL', 'Cholesteryl_Esters_in_Medium_VLDL', 'Cholesteryl_Esters_in_Small_HDL', 'Cholesteryl_Esters_in_Small_LDL', 'Cholesteryl_Esters_in_Small_VLDL', 'Cholesteryl_Esters_in_Very_Large_HDL', 'Cholesteryl_Esters_in_Very_Large_VLDL', 'Cholesteryl_Esters_in_Very_Small_VLDL', 'Cholesteryl_Esters_in_VLDL', 'Total_Esterified_Cholesterol'],
            7: ['Free_Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Free_Cholesterol_in_HDL', 'Free_Cholesterol_in_IDL', 'Free_Cholesterol_in_Large_HDL', 'Free_Cholesterol_in_Large_LDL', 'Free_Cholesterol_in_Large_VLDL', 'Free_Cholesterol_in_LDL', 'Free_Cholesterol_in_Medium_HDL', 'Free_Cholesterol_in_Medium_LDL', 'Free_Cholesterol_in_Medium_VLDL', 'Free_Cholesterol_in_Small_HDL', 'Free_Cholesterol_in_Small_LDL', 'Free_Cholesterol_in_Small_VLDL', 'Free_Cholesterol_in_Very_Large_HDL', 'Free_Cholesterol_in_Very_Large_VLDL', 'Free_Cholesterol_in_Very_Small_VLDL', 'Free_Cholesterol_in_VLDL', 'Total_Free_Cholesterol'],
            8: ['Phospholipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Phospholipids_in_HDL', 'Phospholipids_in_IDL', 'Phospholipids_in_Large_HDL', 'Phospholipids_in_Large_LDL', 'Phospholipids_in_Large_VLDL', 'Phospholipids_in_LDL', 'Phospholipids_in_Medium_HDL', 'Phospholipids_in_Medium_LDL', 'Phospholipids_in_Medium_VLDL', 'Phospholipids_in_Small_HDL', 'Phospholipids_in_Small_LDL', 'Phospholipids_in_Small_VLDL', 'Phospholipids_in_Very_Large_HDL', 'Phospholipids_in_Very_Large_VLDL', 'Phospholipids_in_Very_Small_VLDL', 'Phospholipids_in_VLDL', 'Total_Phospholipids_in_Lipoprotein_Particles'],
            9: ['Triglycerides_in_Chylomicrons_and_Extremely_Large_VLDL', 'Triglycerides_in_HDL', 'Triglycerides_in_IDL', 'Triglycerides_in_Large_HDL', 'Triglycerides_in_Large_LDL', 'Triglycerides_in_Large_VLDL', 'Triglycerides_in_LDL', 'Triglycerides_in_Medium_HDL', 'Triglycerides_in_Medium_LDL', 'Triglycerides_in_Medium_VLDL', 'Triglycerides_in_Small_HDL', 'Triglycerides_in_Small_LDL', 'Triglycerides_in_Small_VLDL', 'Triglycerides_in_Very_Large_HDL', 'Triglycerides_in_Very_Large_VLDL', 'Triglycerides_in_Very_Small_VLDL', 'Triglycerides_in_VLDL', 'Total_Triglycerides'],
            10: ['Total_Lipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Total_Lipids_in_HDL', 'Total_Lipids_in_IDL', 'Total_Lipids_in_Large_HDL', 'Total_Lipids_in_Large_LDL', 'Total_Lipids_in_Large_VLDL', 'Total_Lipids_in_LDL', 'Total_Lipids_in_Lipoprotein_Particles', 'Total_Lipids_in_Medium_HDL', 'Total_Lipids_in_Medium_LDL', 'Total_Lipids_in_Medium_VLDL', 'Total_Lipids_in_Small_HDL', 'Total_Lipids_in_Small_LDL', 'Total_Lipids_in_Small_VLDL', 'Total_Lipids_in_Very_Large_HDL', 'Total_Lipids_in_Very_Large_VLDL', 'Total_Lipids_in_Very_Small_VLDL', 'Total_Lipids_in_VLDL'],
            11: ['Glycoprotein_Acetyls']
        }

    def filter_data(self):
        # Drop rows with NaN values in 'CVD Attribute'
        self.cvd_df = self.cvd_df.dropna(subset=['CVD Attribute'])

    def calculate_avg_p_values(self):
        # AVG P-Values per cluster
        def assign_cluster(nmr_value):
            for cluster, values in self.clusters.items():
                if nmr_value in values:
                    return cluster
            return None

        self.cvd_df['Clusters'] = self.cvd_df['NMR Attribute'].apply(assign_cluster)
        cvd_average_p_values = self.cvd_df.groupby('Clusters')['P-Value'].mean()
        return cvd_average_p_values

    def calculate_avg_mi_values(self):
        # AVG MI values per cluster
        def assign_cluster(nmr_value):
            for cluster, values in self.clusters.items():
                if nmr_value in values:
                    return cluster
            return None

        self.cvd_df['Clusters'] = self.cvd_df['NMR Attribute'].apply(assign_cluster)
        cvd_average_mi_values = self.cvd_df.groupby('Clusters')['Mutual Information'].mean()
        return cvd_average_mi_values

    def save_to_csv(self, output_file='data/cvd_avg_p_mi.csv'):
        # Save averages to CSV
        avg_p_values = self.calculate_avg_p_values()
        avg_mi_values = self.calculate_avg_mi_values()

        categories = ['Apolipoproteins', 'Lipoprotein Particle Concentrations', 'Lipoprotein Particle Diameters',
                      'Cholesterol in Different Lipoproteins', 'Cholesteryl Esters in Different Lipoproteins',
                      'Free Cholesterol in Different Lipoproteins', 'Phospholipids in Different Lipoproteins',
                      'Triglycerides in Different Lipoproteins', 'Total Lipids in Different Lipoproteins',
                      'Other Specific Markers']

        data = zip(categories, avg_mi_values, avg_p_values)

        with open(output_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Category', 'Average_MI', 'Average_P'])
            writer.writerows(data)


CVD
Avg P-value per cluster Clusters
2     0.504950
3     0.514301
4     0.471947
5     0.503065
6     0.511001
7     0.511001
8     0.507701
9     0.536854
10    0.510451
11    0.504950
Name: P-Value, dtype: float64
Avg MI per cluster Clusters
2     0.002272
3     0.001842
4     0.002475
5     0.003076
6     0.003026
7     0.003023
8     0.002046
9     0.000994
10    0.002006
11    0.003388
Name: Mutual Information, dtype: float64


In [None]:
if __name__ == "__main__":
    cvd_evaluator = CVD_Sig_Clusters()
    cvd_evaluator.filter_data()
    avg_p_values = cvd_evaluator.calculate_avg_p_values()
    avg_mi_values = cvd_evaluator.calculate_avg_mi_values()
    cvd_evaluator.save_to_csv()
    print("Avg P-value per cluster:\n", avg_p_values)
    print("Avg MI per cluster:\n", avg_mi_values)

Depression Significant Clusters

In [None]:
class DEPR_Sig_Clusters:
    def __init__(self, p_values_file='data/atr_p_values.csv'):
        self.depr_df = pd.read_csv(p_values_file)
        self.clusters = {
            0: ['Depressed.At.Baseline', 'Loneliness', 'Social.Isolation', 'PHQ9.No.Info', 'PHQ9.Screen', 'PHQ9.Items', 'PHQ9.Severity', 'CIDI.MDD.No.Info', 'CIDI.MDD.Screen', 'CIDI.MDD.Response', 'CIDI.MDD.Severity', 'GAD.CIDI.Somatic'],
            1: ['CVD'],
            2: ['Apolipoprotein_A1', 'Apolipoprotein_B'],
            3: ['Concentration_of_Chylomicrons_and_Extremely_Large_VLDL_Particles', 'Concentration_of_HDL_Particles', 'Concentration_of_IDL_Particles', 'Concentration_of_Large_HDL_Particles', 'Concentration_of_Large_LDL_Particles', 'Concentration_of_Large_VLDL_Particles', 'Concentration_of_LDL_Particles', 'Concentration_of_Medium_HDL_Particles', 'Concentration_of_Medium_LDL_Particles', 'Concentration_of_Medium_VLDL_Particles', 'Concentration_of_Small_HDL_Particles', 'Concentration_of_Small_LDL_Particles', 'Concentration_of_Small_VLDL_Particles', 'Concentration_of_Very_Large_HDL_Particles', 'Concentration_of_Very_Large_VLDL_Particles', 'Concentration_of_Very_Small_VLDL_Particles', 'Concentration_of_VLDL_Particles', 'Total_Concentration_of_Lipoprotein_Particles'],
            4: ['Average_Diameter_for_HDL_Particles', 'Average_Diameter_for_LDL_Particles', 'Average_Diameter_for_VLDL_Particles'],
            5: ['Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesterol_in_IDL', 'Cholesterol_in_Large_HDL', 'Cholesterol_in_Large_LDL', 'Cholesterol_in_Large_VLDL', 'Cholesterol_in_Medium_HDL', 'Cholesterol_in_Medium_LDL', 'Cholesterol_in_Medium_VLDL', 'Cholesterol_in_Small_HDL', 'Cholesterol_in_Small_LDL', 'Cholesterol_in_Small_VLDL', 'Cholesterol_in_Very_Large_HDL', 'Cholesterol_in_Very_Large_VLDL', 'Cholesterol_in_Very_Small_VLDL', 'Clinical_LDL_Cholesterol', 'HDL_Cholesterol', 'LDL_Cholesterol', 'VLDL_Cholesterol', 'Remnant_Cholesterol_(Non-HDL,_Non-LDL_-Cholesterol)', 'Total_Cholesterol', 'Total_Cholesterol_Minus_HDL-C'],
            6: ['Cholesteryl_Esters_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesteryl_Esters_in_HDL', 'Cholesteryl_Esters_in_IDL', 'Cholesteryl_Esters_in_Large_HDL', 'Cholesteryl_Esters_in_Large_LDL', 'Cholesteryl_Esters_in_Large_VLDL', 'Cholesteryl_Esters_in_LDL', 'Cholesteryl_Esters_in_Medium_HDL', 'Cholesteryl_Esters_in_Medium_LDL', 'Cholesteryl_Esters_in_Medium_VLDL', 'Cholesteryl_Esters_in_Small_HDL', 'Cholesteryl_Esters_in_Small_LDL', 'Cholesteryl_Esters_in_Small_VLDL', 'Cholesteryl_Esters_in_Very_Large_HDL', 'Cholesteryl_Esters_in_Very_Large_VLDL', 'Cholesteryl_Esters_in_Very_Small_VLDL', 'Cholesteryl_Esters_in_VLDL', 'Total_Esterified_Cholesterol'],
            7: ['Free_Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Free_Cholesterol_in_HDL', 'Free_Cholesterol_in_IDL', 'Free_Cholesterol_in_Large_HDL', 'Free_Cholesterol_in_Large_LDL', 'Free_Cholesterol_in_Large_VLDL', 'Free_Cholesterol_in_LDL', 'Free_Cholesterol_in_Medium_HDL', 'Free_Cholesterol_in_Medium_LDL', 'Free_Cholesterol_in_Medium_VLDL', 'Free_Cholesterol_in_Small_HDL', 'Free_Cholesterol_in_Small_LDL', 'Free_Cholesterol_in_Small_VLDL', 'Free_Cholesterol_in_Very_Large_HDL', 'Free_Cholesterol_in_Very_Large_VLDL', 'Free_Cholesterol_in_Very_Small_VLDL', 'Free_Cholesterol_in_VLDL', 'Total_Free_Cholesterol'],
            8: ['Phospholipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Phospholipids_in_HDL', 'Phospholipids_in_IDL', 'Phospholipids_in_Large_HDL', 'Phospholipids_in_Large_LDL', 'Phospholipids_in_Large_VLDL', 'Phospholipids_in_LDL', 'Phospholipids_in_Medium_HDL', 'Phospholipids_in_Medium_LDL', 'Phospholipids_in_Medium_VLDL', 'Phospholipids_in_Small_HDL', 'Phospholipids_in_Small_LDL', 'Phospholipids_in_Small_VLDL', 'Phospholipids_in_Very_Large_HDL', 'Phospholipids_in_Very_Large_VLDL', 'Phospholipids_in_Very_Small_VLDL', 'Phospholipids_in_VLDL', 'Total_Phospholipids_in_Lipoprotein_Particles'],
            9: ['Triglycerides_in_Chylomicrons_and_Extremely_Large_VLDL', 'Triglycerides_in_HDL', 'Triglycerides_in_IDL', 'Triglycerides_in_Large_HDL', 'Triglycerides_in_Large_LDL', 'Triglycerides_in_Large_VLDL', 'Triglycerides_in_LDL', 'Triglycerides_in_Medium_HDL', 'Triglycerides_in_Medium_LDL', 'Triglycerides_in_Medium_VLDL', 'Triglycerides_in_Small_HDL', 'Triglycerides_in_Small_LDL', 'Triglycerides_in_Small_VLDL', 'Triglycerides_in_Very_Large_HDL', 'Triglycerides_in_Very_Large_VLDL', 'Triglycerides_in_Very_Small_VLDL', 'Triglycerides_in_VLDL', 'Total_Triglycerides'],
            10: ['Total_Lipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Total_Lipids_in_HDL', 'Total_Lipids_in_IDL', 'Total_Lipids_in_Large_HDL', 'Total_Lipids_in_Large_LDL', 'Total_Lipids_in_Large_VLDL', 'Total_Lipids_in_LDL', 'Total_Lipids_in_Lipoprotein_Particles', 'Total_Lipids_in_Medium_HDL', 'Total_Lipids_in_Medium_LDL', 'Total_Lipids_in_Medium_VLDL', 'Total_Lipids_in_Small_HDL', 'Total_Lipids_in_Small_LDL', 'Total_Lipids_in_Small_VLDL', 'Total_Lipids_in_Very_Large_HDL', 'Total_Lipids_in_Very_Large_VLDL', 'Total_Lipids_in_Very_Small_VLDL', 'Total_Lipids_in_VLDL'],
            11: ['Glycoprotein_Acetyls']
        }

    def filter_data(self):
        self.depr_df = self.depr_df.dropna(subset=['Depression Attribute'])

    def calculate_avg_p_values(self):
        def assign_cluster(nmr_value):
            for cluster, values in self.clusters.items():
                if nmr_value in values:
                    return cluster
            return None

        self.depr_df['Clusters'] = self.depr_df['NMR Attribute'].apply(assign_cluster)
        depr_average_p_values = self.depr_df.groupby('Clusters')['P-Value'].mean()
        return depr_average_p_values

    def calculate_avg_mi_values(self):
        def assign_cluster(nmr_value):
            for cluster, values in self.clusters.items():
                if nmr_value in values:
                    return cluster
            return None

        self.depr_df['Clusters'] = self.depr_df['NMR Attribute'].apply(assign_cluster)
        depr_average_mi_values = self.depr_df.groupby('Clusters')['Mutual Information'].mean()
        return depr_average_mi_values

    def save_to_csv(self, output_file='data/depr_avg_p_mi.csv'):
        avg_p_values = self.calculate_avg_p_values()
        avg_mi_values = self.calculate_avg_mi_values()

        categories = ['Apolipoproteins', 'Lipoprotein Particle Concentrations', 'Lipoprotein Particle Diameters',
                      'Cholesterol in Different Lipoproteins', 'Cholesteryl Esters in Different Lipoproteins',
                      'Free Cholesterol in Different Lipoproteins', 'Phospholipids in Different Lipoproteins',
                      'Triglycerides in Different Lipoproteins', 'Total Lipids in Different Lipoproteins',
                      'Other Specific Markers']

        data = zip(categories, avg_mi_values, avg_p_values)

        with open(output_file, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Category', 'Average_MI', 'Average_P'])
            writer.writerows(data)

In [None]:
if __name__ == "__main__":
    depr_evaluator = DEPR_Sig_Clusters()
    depr_evaluator.filter_data()
    avg_p_values = depr_evaluator.calculate_avg_p_values()
    avg_mi_values = depr_evaluator.calculate_avg_mi_values()
    depr_evaluator.save_to_csv()
    print("DEPRESSION")
    print("Avg P-value per cluster:\n", avg_p_values)
    print("Avg MI per cluster:\n", avg_mi_values)

In [None]:
class VisualizeNetwork:
    def __init__(self, depr_avg_file='data/depr_avg_p_mi.csv', cvd_avg_file='data/cvd_avg_p_mi.csv'):
        self.depr_df_avg = pd.read_csv(depr_avg_file)
        self.cvd_df_avg = pd.read_csv(cvd_avg_file)

    def merge_dataframes(self):
        self.merged_df = pd.merge(self.cvd_df_avg, self.depr_df_avg, on='Category', suffixes=('_CVD', '_Depression'))

    def plot_scatter(self, output_file='plots/cvd_v_depr_p_val.png'):
        plt.figure(figsize=(10, 6))
        plt.scatter(self.merged_df['Average_P_Depression'], self.merged_df['Average_P_CVD'], color='blue', alpha=0.6)
        plt.title('Average P: Depression vs CVD')
        plt.xlabel('Average P for Depression')
        plt.ylabel('Average P for CVD')
        plt.grid(True)
        for i, category in enumerate(self.merged_df['Category']):
            plt.annotate(category, (self.merged_df['Average_P_Depression'][i], self.merged_df['Average_P_CVD'][i]),
                         textcoords="offset points", xytext=(5, 5), ha='center')
        plt.savefig(output_file, dpi=300)
        plt.show()


In [None]:
if __name__ == "__main__":
    visualizer = VisualizeNetwork()
    visualizer.merge_dataframes()
    visualizer.plot_scatter()