In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


PSI Raw global features - RF

In [2]:
import numpy as np
import networkx as nx

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, f1_score, recall_score, confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn import metrics as mt
from sklearn.preprocessing import MinMaxScaler

import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from matplotlib import pyplot as plt

import csv

In [3]:
n_splits = 5

In [4]:
def get_model(num_classes, choice):
    """
    Creates a Random Forest model for classification.

    Args:
        num_classes (int): Number of classes in the target variable.

    Returns:
        RandomForestClassifier: A Random Forest model instance.
"""
    if choice == 'A':
        cri, n_tree, rand = 'entropy', 1, 91
    elif choice == 'B':
        cri, n_tree, rand = 'entropy', 1, 76
    elif choice == 'C':
        cri, n_tree, rand = 'entropy', 1, 26
    elif choice == 'D':
        cri, n_tree, rand = 'entropy', 1, 61
    else:
        raise ValueError("Invalid choice. Must be 'A', 'B', 'C', or 'D'.")

    if num_classes == 2:
        # Binary classification
        rf = RandomForestClassifier(criterion = cri, n_estimators = n_tree, random_state=rand, class_weight='balanced')
    else:
        # Multi-class classification
        rf = RandomForestClassifier(criterion = cri, n_estimators = n_tree, random_state=rand, class_weight='balanced_subsample')

    return rf

In [5]:
def compile_fit(model, X_train, y_train):
    model.fit(X_train, y_train)
    return model

In [6]:
def eval_model(num_classes, model, X_val, y_val, y_cols):
    # Compute loss and accuracy using model.evaluate()
    # loss, acc = model.evaluate(X_val, y_val)

    y_pred = model.predict(X_val)
    print('y_pred', y_pred)
    print('y_val', y_val)

    if (num_classes == 3) :

        # Convert y_val to multiclass format
        y_val = np.argmax(y_val, axis=1)

        # Convert y_pred to multiclass format
        y_pred = np.argmax(y_pred, axis=1)

        print('y_pred', y_pred)
        print('y_val', y_val)

        # Check the type of y_pred_probs
        print("Type of y_pred using model.predict:", type(y_pred))
        print("shape of the y_pred using model.predict:", y_pred.shape)

        # Compute confusion matrix
        # y_val_argmax = np.argmax(y_val, axis=1)
        # y_pred_argmax = np.argmax(y_pred, axis=1)
        # conf_mat = confusion_matrix(y_val_argmax, y_pred_argmax)

        conf_mat = mt.confusion_matrix(y_val, y_pred)
        print("confusion matrix ", conf_mat)

        target_names = y_cols

        print("classification report", mt.classification_report(y_val, y_pred, target_names=target_names, digits = 3))

        # Compute classification report
        report = mt.classification_report(y_val, y_pred, target_names=target_names, output_dict=True)
        report_df = pd.DataFrame(report).T

        print("classification report in dataframe - match accuracy with model.evaluate ")
        print(report_df)

        acc = report_df.iloc[3,1]

        # Select the first three rows
        report_df_top3 = report_df.head(3)

        # Calculate average metrics for the first three rows
        avg_precision = report_df_top3['precision'].mean()
        avg_recall = report_df_top3['recall'].mean()
        avg_f1_score = report_df_top3['f1-score'].mean()

        print(f"Average Precision (first 3 classes): {avg_precision:.3f}")
        print(f"Average Recall (first 3 classes): {avg_recall:.3f}")
        print(f"Average F1-Score (first 3 classes): {avg_f1_score:.3f}")

        metrics = {
            'acc': acc,
            'conf_mat': conf_mat,
            'sens (recall)': avg_recall,
            'f1': avg_f1_score,
            'prec': avg_precision
        }

    elif (num_classes == 2) :

        # Convert y_val to multiclass format
        y_val = np.argmax(y_val, axis=1)

        # Convert y_pred to multiclass format
        y_pred = np.argmax(y_pred, axis=1)

        print('y_pred', y_pred)
        print('y_val', y_val)

        # Check the type of y_pred_probs
        print("Type of y_pred using model.predict:", type(y_pred))
        print("shape of the y_pred using model.predict:", y_pred.shape)

        # Compute confusion matrix
        conf_mat = confusion_matrix(y_val, y_pred)

        # Compute confusion matrix
        # y_pred = np.argmax(y_pred, axis=1)
        # conf_mat = confusion_matrix(y_val, y_pred)

        # Compute metrics from confusion matrix
        tn, fp, fn, tp = conf_mat.ravel()
        precision = precision_score(y_val, y_pred)
        recall = recall_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred)
        acc = accuracy_score(y_val, y_pred)

        metrics = {
            'acc': acc,
            'conf_mat': conf_mat,
            'sens (recall)': recall,
            'f1': f1,
            'prec': precision,
            'tn': tn,
            'tp': tp,
            'fn': fn,
            'fp': fp
        }

    else :
        metrics = {}

    return metrics

In [7]:
'''
# Replacing nan values to 0
def nan_to_0(data):

    df1 = data.copy()

    for idx, row in df1.iterrows():
        arr = row['psi_matrix']
        matrix = np.nan_to_num(arr, copy = True, nan = 0.0)
        df1.at[idx, 'psi_matrix'] = matrix

    print(df1)

    return df1
'''

"\n# Replacing nan values to 0\ndef nan_to_0(data):\n\n    df1 = data.copy()\n\n    for idx, row in df1.iterrows():\n        arr = row['psi_matrix']\n        matrix = np.nan_to_num(arr, copy = True, nan = 0.0)\n        df1.at[idx, 'psi_matrix'] = matrix\n\n    print(df1)\n\n    return df1\n"

In [8]:
'''
def computeMinMax(X):
    min_matrix = X.min(axis = 0)
    max_matrix = X.max(axis = 0)
    return (min_matrix, max_matrix)
'''

'\ndef computeMinMax(X):\n    min_matrix = X.min(axis = 0)\n    max_matrix = X.max(axis = 0)\n    return (min_matrix, max_matrix)\n'

In [9]:
'''
def normalize_instance(X, minn, maxx):
    normalised_X = np.zeros(shape=(X.shape[0], X.shape[1]))

    for idx, x in np.ndenumerate(X):
        if minn[idx] == maxx[idx]:
            normalised_X[idx] = x
        else:
            normalised_X[idx] = (x - minn[idx])/(maxx[idx] - minn[idx])
    return normalised_X
'''

'\ndef normalize_instance(X, minn, maxx):\n    normalised_X = np.zeros(shape=(X.shape[0], X.shape[1]))\n\n    for idx, x in np.ndenumerate(X):\n        if minn[idx] == maxx[idx]:\n            normalised_X[idx] = x\n        else:\n            normalised_X[idx] = (x - minn[idx])/(maxx[idx] - minn[idx])\n    return normalised_X\n'

In [10]:
'''
def normalize(X_train, X_val):
    # Assuming X_train is your DataFrame with matrices in a single column
    matrices = X_train  # Get the values from the 'matrices' column
    # Convert the matrices to a 2D NumPy array
    X_train_2d = np.stack(matrices)

    # Assuming X_train is your DataFrame with matrices in a single column
    matrices = X_val  # Get the values from the 'matrices' column
    # Convert the matrices to a 2D NumPy array
    X_val_2d = np.stack(matrices)

    min_matrix, max_matrix = computeMinMax(X_train_2d)

    print("shape of min matrix", min_matrix.shape)
    print("shape of max matrix", max_matrix.shape)

    normalized_instances = []
    for instance in X_train_2d:
        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)
        normalized_instances.append(normalized_instance)

    # Convert the list of normalized instances to a NumPy array
    X_normalized_trained_2d = np.array(normalized_instances)

    normalized_instances = []
    for instance in X_val_2d:
        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)
        normalized_instances.append(normalized_instance)

    # Convert the list of normalized instances to a NumPy array
    X_normalized_val_2d = np.array(normalized_instances)

    return (X_normalized_trained_2d, X_normalized_val_2d)
'''

'\ndef normalize(X_train, X_val):\n    # Assuming X_train is your DataFrame with matrices in a single column\n    matrices = X_train  # Get the values from the \'matrices\' column\n    # Convert the matrices to a 2D NumPy array\n    X_train_2d = np.stack(matrices)\n\n    # Assuming X_train is your DataFrame with matrices in a single column\n    matrices = X_val  # Get the values from the \'matrices\' column\n    # Convert the matrices to a 2D NumPy array\n    X_val_2d = np.stack(matrices)\n\n    min_matrix, max_matrix = computeMinMax(X_train_2d)\n\n    print("shape of min matrix", min_matrix.shape)\n    print("shape of max matrix", max_matrix.shape)\n\n    normalized_instances = []\n    for instance in X_train_2d:\n        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)\n        normalized_instances.append(normalized_instance)\n\n    # Convert the list of normalized instances to a NumPy array\n    X_normalized_trained_2d = np.array(normalized_instances)\n\n  

In [11]:
def set_threshold(df):
    thresholds = []  # Initialize an empty list to store thresholds

    # Iterate over each row in the DataFrame
    for index, row in df.iterrows():
        # Extract the matrix from the attribute of the current row
        matrix = row['psi_matrix']

        # Flatten the matrix into a 1D array
        flat_matrix = matrix.flatten()

        # Create a histogram of the values in the array
        hist, bins = np.histogram(flat_matrix, bins=5)  # Adjust the number of bins as needed

        # Find the bin with the highest count
        max_count_index = np.argmax(hist)

        # Determine the corresponding value (bin edge) as the threshold
        threshold_value = bins[max_count_index + 1]

        # Print the threshold value for the current row (optional)
        print("Threshold value for row", index, ":", threshold_value)

        # Append the threshold value to the list
        thresholds.append(threshold_value)

    # Calculate the mean of the thresholds
    mean_threshold = np.mean(thresholds)

    return mean_threshold

In [12]:
'''
def binarize_matrices(df, threshold):

    def binarize_matrix(matrix):
        binarized_matrix = np.copy(matrix)
        # Apply thresholding to the matrix
        binarised_matrix = np.where(matrix >= threshold, 1, 0)
        return binarized_matrix

    # Make a copy of the original DataFrame
    final_df = df.copy()

    # Iterate over each row and update the 'psi_matrix' column
    for index, row in final_df.iterrows():
        matrix = row['psi_matrix']
        binarized_matrix = binarize_matrix(matrix)
        # Update the matrix attribute in the copied DataFrame
        final_df.at[index, 'psi_matrix'] = binarized_matrix

    return final_df
'''

"\ndef binarize_matrices(df, threshold):\n\n    def binarize_matrix(matrix):\n        binarized_matrix = np.copy(matrix)\n        # Apply thresholding to the matrix\n        binarised_matrix = np.where(matrix >= threshold, 1, 0)\n        return binarized_matrix\n\n    # Make a copy of the original DataFrame\n    final_df = df.copy()\n\n    # Iterate over each row and update the 'psi_matrix' column\n    for index, row in final_df.iterrows():\n        matrix = row['psi_matrix']\n        binarized_matrix = binarize_matrix(matrix)\n        # Update the matrix attribute in the copied DataFrame\n        final_df.at[index, 'psi_matrix'] = binarized_matrix\n\n    return final_df\n"

In [13]:
def binarize_matrices(df, threshold):
    # Make a copy of the original DataFrame
    final_df = df.copy()

    # Iterate over each row and update the 'psi_matrix' column
    for index, row in final_df.iterrows():
        matrix = row['psi_matrix']
        newmatrix = np.where(matrix>=threshold, 1, 0)
        # Update the matrix attribute in the copied DataFrame
        final_df.at[index, 'psi_matrix'] = newmatrix

    return final_df

In [14]:
def remove_loop(df):

    final_df = df.copy()

    # Define the identity matrix
    identity_matrix = np.eye(132)

    # Iterate over each row
    for i, row in final_df.iterrows():
        # Check if the first column contains a NumPy array
        if isinstance(row['psi_matrix'], np.ndarray):
          # Subtract the identity matrix from the NumPy array
            final_df.at[i, 'psi_matrix'] = row['psi_matrix'] - identity_matrix
        else:
            # Skip this row if the first column doesn't contain a NumPy array
            print(f"Skipping row {i}: First column doesn't contain a NumPy array.")

    return final_df

In [15]:
def extract_features(df):
    # Initialize lists to store the calculated features
    clustering_coefficients = []
    average_node_degrees = []
    global_efficiencies = []
    characteristic_path_lengths = []
    assortativity = []

    # Iterate over each row in the input DataFrame
    for index, row in df.iterrows():
        # Step 1: Extract the thresholded adjacency matrix
        adjacency_matrix = row['psi_matrix']

        # Step 2: Convert the adjacency matrix to a NetworkX graph
        G = nx.from_numpy_array(adjacency_matrix)

        # Step 3: Calculate the topological features
        clustering_coefficient = nx.average_clustering(G)
        average_node_degree = sum(dict(G.degree()).values()) / len(G)
        global_efficiency = nx.global_efficiency(G)
        try:
            characteristic_path_length = nx.average_shortest_path_length(G)
        except nx.NetworkXError:
            characteristic_path_length = 0

        assort = nx.degree_assortativity_coefficient(G)

        # Step 4: Append the calculated features to the lists
        clustering_coefficients.append(clustering_coefficient)
        average_node_degrees.append(average_node_degree)
        global_efficiencies.append(global_efficiency)
        characteristic_path_lengths.append(characteristic_path_length)
        assortativity.append(assort)

    # Create a new DataFrame with the calculated features and original columns
    topological_features_df = pd.DataFrame({
        'subject': df['subject'],
        'clustering_coefficient': clustering_coefficients,
        'average_node_degree': average_node_degrees,
        'global_efficiency': global_efficiencies,
        'characteristic_path_length': characteristic_path_lengths,
        'assortativity': assortativity,
        'autism': df['autism'],
        'adhd': df['adhd'],
        'healthy': df['healthy']
    })

    # Print the DataFrame with calculated features
    print(topological_features_df)
    topological_features_df.fillna(0, inplace=True)
    # topological_features_df.to_csv('/content/drive/MyDrive/Colab Notebooks/ROIxTimeseries/psi_features_data.csv', index=False)

    return topological_features_df

In [16]:
def make_dataset(choice):
    # Load dataframe from the pickle file
    data = pd.read_pickle(r"/content/drive/MyDrive/Colab Notebooks/ROIxTimeseries/psi_data.pkl")

    if choice == 'A':
        # Filter rows where 'adhd' or 'autism' is 1 (keep only ADHD or autism subjects)
        data = data[(data['adhd'] == 1) | (data['autism'] == 1)]
        y_cols = ['adhd', 'autism']  # Specify the columns for y
    elif choice == 'B':
        # Filter rows where 'autism' or 'healthy' is 1 (keep only autism or healthy subjects)
        data = data[(data['autism'] == 1) | (data['healthy'] == 1)]
        y_cols = ['autism', 'healthy']  # Specify the columns for y
    elif choice == 'C':
        # Filter rows where 'adhd' or 'healthy' is 1 (keep only ADHD or healthy subjects)
        data = data[(data['adhd'] == 1) | (data['healthy'] == 1)]
        y_cols = ['adhd', 'healthy']  # Specify the columns for y
    elif choice == 'D':
        # Keep all rows
        y_cols = ['adhd', 'autism', 'healthy']  # Specify the columns for y
    else:
        print("Invalid choice. Please enter 'A', 'B', 'C', or 'D'.")
        return pd.DataFrame(), []

    print(data)

    # df1 = nan_to_0(data)

    print(y_cols)
    return data, y_cols

In [17]:
def driver(choice):

    # choice = input("Enter your choice (A, B, C, or D): ").upper()

    choice = choice.upper()

    df, y_cols = make_dataset(choice)
    #print(d.head)

    # avg_thresh = set_threshold(df)
    # print("Mean Threshold", avg_thresh)
    thresh = 0.8

    df = binarize_matrices(df, thresh)
    df_loop = remove_loop(df)
    f_df = extract_features(df_loop)

    X = f_df.drop(columns=['subject','autism','adhd','healthy'])
    print(X.isna().sum())

    y = f_df[y_cols].values
    #y = to_categorical(y, num_classes=3)
    # print(y.shape)
    # print(y)
    print("type of label columns", type(y))

    # Get the number of classes
    num_classes = y.shape[1]
    print("No. of classes", num_classes)

    # input_shape = X[0].shape
    # print("Input_shape:", input_shape)

    if (num_classes == 2) :
        result_df = pd.DataFrame(columns = ['seed','fold','acc','conf_mat', 'sens (recall)','f1','prec', 'tn', 'tp', 'fn', 'fp'])
    elif (num_classes == 3) :
        result_df = pd.DataFrame(columns = ['seed','fold','acc','conf_mat', 'sens (recall)','f1','prec'])
    else :
        result_df = {}

    # Set a fixed seed for reproducibility
    # np.random.seed(19)

    # Manually set random seeds
    random_seeds = np.array([93, 98, 40, 19, 52, 74, 31, 66, 56, 22])
    print("Random seeds for outer loops:", random_seeds)

    all_results = []

    for outer_loop, random_seed in enumerate(random_seeds):
        print(f"Outer loop iteration: {outer_loop + 1}, Random seed: {random_seed}")

        kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_seed)

        for i, (train_index, val_index) in enumerate(kf.split(X, y)):

            print("FOLD : ", i+1)

            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # mmx = MinMaxScaler()
            # X_train = mmx.fit_transform(X_train)
            # X_val = mmx.transform(X_val)

            # compiled_m = get_model(num_classes)
            compiled_m = get_model(num_classes, choice)

            trained_m = compile_fit(compiled_m, X_train, np.array(y_train))
            #plot_history(history, i+1)

            scores = eval_model(num_classes, trained_m, X_val, y_val, y_cols)
            scores['seed']=random_seed
            scores['fold']=i+1
            print("Scores", scores)
            scores = pd.DataFrame([scores])
            result_df = pd.concat([result_df,scores], ignore_index=True)

        all_results.append(result_df)
        print(f"Outer loop {outer_loop + 1} result_df:")
        print(result_df)
        print("\n")

    return all_results

In [18]:
# Define a list of choices
choices = ['A', 'B', 'C', 'D']
# choices = ['A', 'B', 'C']

# Create an empty dictionary to store the result dataframes
result_dfs = {}

# Loop through each choice
for choice in choices:
    # Call the driver() function with the current choice
    result_df = driver(choice)

    # Store the result dataframe in the dictionary with the choice as the key
    result_dfs[choice] = result_df

    # # Combine all results for this choice into a single DataFrame
    # combined_df = pd.DataFrame()
    # for i, df in enumerate(result_df):
    #     df['outer_loop'] = i + 1  # Add a column to identify the outer loop
    #     combined_df = pd.concat([combined_df, df], ignore_index=True)

    # storing just last outer loop
    # Get the last outer loop result (which is the last DataFrame in the list)
    combined_df = result_df[-1]

    # Save the combined results to a CSV file
    filename = f"choice_{choice}_results.csv"
    combined_df.to_csv(f'/content/drive/MyDrive/Colab Notebooks/PSI/(PF_Fix)PSI_Globalfeatures/0.8/RF/{filename}', index=False)
    print(f"Saved combined results for choice {choice} to {filename}")

        subject                                         psi_matrix  adhd  \
0      subject1  [[1.0, 0.8016814987579317, 0.839641852247262, ...     0   
1      subject2  [[1.0, 0.9101210641534417, 0.576123749895304, ...     0   
2      subject3  [[1.0, 0.7530150385517623, 0.5067414702685676,...     0   
3      subject4  [[1.0, 0.817812140965262, 0.520061068095793, 0...     0   
4      subject5  [[1.0, 0.7477157295145737, 0.8259250654415865,...     0   
..          ...                                                ...   ...   
105  subject106  [[1.0, 0.7190486619620364, 0.3595353948075287,...     1   
106  subject107  [[1.0, 0.892350228080446, 0.6115053964108019, ...     1   
107  subject108  [[1.0, 0.9383744300499544, 0.7480842785117325,...     1   
108  subject109  [[1.0, 0.9300675513188588, 0.9240553408215981,...     1   
109  subject110  [[1.0, 0.7946738858174632, 0.5573150422007396,...     1   

     autism  healthy  
0         1        0  
1         1        0  
2         1       

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.467993            14.212121   
1      subject2                0.075180             0.757576   
2      subject3                0.127020             1.272727   
3      subject4                0.045455             0.424242   
4      subject5                0.051263             0.530303   
..          ...                     ...                  ...   
105  subject106                0.052525             0.500000   
106  subject107                0.326143             5.545455   
107  subject108                0.393865            10.560606   
108  subject109                0.917899           103.106061   
109  subject110                0.139502             1.272727   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.259424                           0      -0.062731       1   
1             0.014816                           0       0.598851       1   


  result_df = pd.concat([result_df,scores], ignore_index=True)


y_pred [[0 1]
 [0 1]
 [1 0]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 0]]
y_val [[0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]]
y_pred [1 1 0 1 1 0 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0]
y_val [1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.5454545454545454, 'conf_mat': array([[3, 6],
       [4, 9]]), 'sens (recall)': 0.6923076923076923, 'f1': 0.6428571428571429, 'prec': 0.6, 'tn': 3, 'tp': 9, 'fn': 4, 'fp': 6, 'seed': 98, 'fold': 3}
FOLD :  4
y_pred [[0 1]
 [0 1]
 [1 0]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [1 0]
 [0 1]
 [0 1]]
y_val [[0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.467993            14.212121   
1      subject2                0.075180             0.757576   
2      subject3                0.127020             1.272727   
3      subject4                0.045455             0.424242   
4      subject5                0.051263             0.530303   
..          ...                     ...                  ...   
160  subject161                0.241469             3.181818   
161  subject162                0.171162             1.272727   
162  subject163                0.402180             8.939394   
163  subject164                0.419386            15.000000   
164  subject165                0.735862            49.924242   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.259424                           0      -0.062731       1   
1             0.014816                           0       0.598851       1   


  result_df = pd.concat([result_df,scores], ignore_index=True)


Scores {'acc': 0.9090909090909091, 'conf_mat': array([[ 8,  1],
       [ 1, 12]]), 'sens (recall)': 0.9230769230769231, 'f1': 0.9230769230769231, 'prec': 0.9230769230769231, 'tn': 8, 'tp': 12, 'fn': 1, 'fp': 1, 'seed': 98, 'fold': 2}
FOLD :  3
y_pred [[0 1]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]]
y_val [[1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]]
y_pred [1 1 0 0 0 1 1 0 1 0 0 0 0 1 1 1 1 0 0 0 0 0]
y_val [0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.5454545454545454, 'conf_mat': array([[8, 5],
       [5, 4]]), 'sens (recall)': 0.4444444444444444, 'f1': 0.4444444444444444, 'prec': 0.4444444444444444, 'tn': 8, 'tp': 4, 'fn': 5, 'fp': 5, 'seed': 98, 'fo

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


        subject  clustering_coefficient  average_node_degree  \
55    subject56                0.200745             1.636364   
56    subject57                0.046717             0.409091   
57    subject58                0.096212             0.560606   
58    subject59                0.356686             8.878788   
59    subject60                0.017677             0.393939   
..          ...                     ...                  ...   
160  subject161                0.241469             3.181818   
161  subject162                0.171162             1.272727   
162  subject163                0.402180             8.939394   
163  subject164                0.419386            15.000000   
164  subject165                0.735862            49.924242   

     global_efficiency  characteristic_path_length  assortativity  autism  \
55            0.035104                           0       0.247947       0   
56            0.003624                           0       0.658228       0   


  result_df = pd.concat([result_df,scores], ignore_index=True)


 (22,)
Scores {'acc': 0.6363636363636364, 'conf_mat': array([[7, 6],
       [2, 7]]), 'sens (recall)': 0.7777777777777778, 'f1': 0.6363636363636364, 'prec': 0.5384615384615384, 'tn': 7, 'tp': 7, 'fn': 2, 'fp': 6, 'seed': 98, 'fold': 1}
FOLD :  2
y_pred [[1 0]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [0 1]
 [0 1]]
y_val [[1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]]
y_pred [0 1 0 0 0 1 0 0 1 1 1 0 1 0 1 1 1 0 1 0 1 1]
y_val [0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.6818181818181818, 'conf_mat': array([[6, 3],
       [4, 9]]), 'sens (recall)': 0.6923076923076923, 'f1': 0.72, 'prec': 0.75, 'tn': 6, 'tp': 9, 'fn': 4, 'fp': 3, 'seed': 98, 'fold': 2}
FOLD :  3
y_pred [

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.467993            14.212121   
1      subject2                0.075180             0.757576   
2      subject3                0.127020             1.272727   
3      subject4                0.045455             0.424242   
4      subject5                0.051263             0.530303   
..          ...                     ...                  ...   
160  subject161                0.241469             3.181818   
161  subject162                0.171162             1.272727   
162  subject163                0.402180             8.939394   
163  subject164                0.419386            15.000000   
164  subject165                0.735862            49.924242   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.259424                           0      -0.062731       1   
1             0.014816                           0       0.598851       1   


  result_df = pd.concat([result_df,scores], ignore_index=True)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [0 0 1]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 0 1]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [0 0 1]]
y_val [[0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]
 [0 0 1]]
y_pred [1 0 2 1 2 2 0 0 2 2 0 0 1 1 1 0 1 1 0 2 0 0 0 2 1 1 0 1 1 1 0 0 2]
y_val [1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (33,)
confusion matrix  [[7 7 3]
 [3 2 4]
 [3 3 1]]
classification report               precision    recall  f1-score   support

        adhd      0.538     0.412     0.467        17
      au

### CHANGES MADE:

1. Binariztion method corrected
2. clustering coefficient set to 0 in case of disconnected graph
3. assortativity set to zero where it was NaN

In [None]:
# # Define a list of choices
# # choices = ['A', 'B', 'C', 'D']
# choices = ['D']

# # Create an empty dictionary to store the result dataframes
# result_dfs = {}

# # Loop through each choice
# for choice in choices:
#     # Call the driver() function with the current choice
#     result_df = driver(choice)

#     # Store the result dataframe in the dictionary with the choice as the key
#     result_dfs[choice] = result_df

        subject                                         psi_matrix  adhd  \
0      subject1  [[1.0, 0.8016814987579317, 0.839641852247262, ...     0   
1      subject2  [[1.0, 0.9101210641534417, 0.576123749895304, ...     0   
2      subject3  [[1.0, 0.7530150385517623, 0.5067414702685676,...     0   
3      subject4  [[1.0, 0.817812140965262, 0.520061068095793, 0...     0   
4      subject5  [[1.0, 0.7477157295145737, 0.8259250654415865,...     0   
..          ...                                                ...   ...   
160  subject161  [[1.0, 0.8966881181531998, 0.7033025053563576,...     0   
161  subject162  [[1.0, 0.4594896330132013, 0.08037697590578775...     0   
162  subject163  [[1.0, 0.9030386042889056, 0.7605824095393477,...     0   
163  subject164  [[1.0, 0.8822326646328201, 0.6885390808656763,...     0   
164  subject165  [[1.0, 0.9726522672232811, 0.8350244047495731,...     0   

     autism  healthy  
0         1        0  
1         1        0  
2         1       

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.979236           125.666667   
1      subject2                0.803892            83.863636   
2      subject3                0.915260           111.863636   
3      subject4                0.860111            98.878788   
4      subject5                0.942771           117.348485   
..          ...                     ...                  ...   
160  subject161                0.959946           117.303030   
161  subject162                0.821577            82.333333   
162  subject163                0.952761           116.878788   
163  subject164                0.964880           118.742424   
164  subject165                0.996221           129.439394   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.979644                    1.040712      -0.040772       1   
1             0.820071                    1.359935       0.260843       1   


In [None]:
# print(result_dfs['A'])
# result_dfs['A'].to_csv('results/PSI_RF-globalfeatures/adhd-autism.csv', mode = 'w', index=False)

In [None]:
# print(result_dfs['B'])
# result_dfs['B'].to_csv('results/PSI_RF-globalfeatures/autism-healthy.csv', mode = 'w', index=False)

In [None]:
# print(result_dfs['C'])
# result_dfs['C'].to_csv('results/PSI_RF-globalfeatures/adhd-healthy.csv', mode = 'w', index=False)

In [None]:
# print(result_dfs['D'])
# result_dfs['D'].to_csv('results/PSI_RF-globalfeatures/adhd-autism-healthy.csv', mode = 'w', index=False)