In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


PSI threshold matrices then features 20 times then mean - SVM

In [2]:
import numpy as np
import networkx as nx

from sklearn import svm
from sklearn.svm import SVC
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, f1_score, recall_score, confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn import metrics as mt
from sklearn.preprocessing import MinMaxScaler

import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from matplotlib import pyplot as plt

import csv

In [3]:
n_splits = 5

In [4]:
def get_model(num_classes, choice):
    """
    Creates an SVM model for classification.

    Args:
        num_classes (int): Number of classes in the target variable.

    Returns:
        SVC: An SVM model instance.
    """
    if (choice == 'A'):
      c_val, ker = 1.5, 'rbf'
    elif (choice == 'B'):
      c_val, ker = 18, 'rbf'
    elif (choice == 'C'):
      c_val, ker = 3, 'rbf'
    elif (choice == 'D'):
      c_val, ker = 7, 'rbf'
    else:
      raise ValueError("Invalid choice. Must be 'A', 'B', 'C', or 'D'.")

    svm = SVC(C=c_val, kernel=ker, decision_function_shape='ovr')

    return svm

In [5]:
def compile_fit(model, X_train, y_train):
    model.fit(X_train, y_train)
    return model

In [6]:
def eval_model(num_classes, model, X_val, y_val, y_cols):
  # Compute loss and accuracy using model.evaluate()
  # loss, acc = model.evaluate(X_val, y_val)

  y_pred = model.predict(X_val)
  print('y_pred', y_pred)
  print('y_val', y_val)

  if (num_classes == 3) :

    # Convert y_val to multiclass format
    # y_val = np.argmax(y_val, axis=1)

    # # Convert y_pred to multiclass format
    # y_pred = np.argmax(y_pred, axis=1)

    # print('y_pred', y_pred)
    # print('y_val', y_val)

    # Check the type of y_pred_probs
    print("Type of y_pred using model.predict:", type(y_pred))
    print("shape of the y_pred using model.predict:", y_pred.shape)

    # Compute confusion matrix
    # y_val_argmax = np.argmax(y_val, axis=1)
    # y_pred_argmax = np.argmax(y_pred, axis=1)
    # conf_mat = confusion_matrix(y_val_argmax, y_pred_argmax)

    conf_mat = mt.confusion_matrix(y_val, y_pred)
    print("confusion matrix ", conf_mat)

    target_names = y_cols

    print("classification report", mt.classification_report(y_val, y_pred, target_names=target_names, digits = 3))

    # Compute classification report
    report = mt.classification_report(y_val, y_pred, target_names=target_names, output_dict=True)
    report_df = pd.DataFrame(report).T

    print("classification report in dataframe - match accuracy with model.evaluate ")
    print(report_df)

    acc = report_df.iloc[3,1]

    # Select the first three rows
    report_df_top3 = report_df.head(3)

    # Calculate average metrics for the first three rows
    avg_precision = report_df_top3['precision'].mean()
    avg_recall = report_df_top3['recall'].mean()
    avg_f1_score = report_df_top3['f1-score'].mean()

    print(f"Average Precision (first 3 classes): {avg_precision:.3f}")
    print(f"Average Recall (first 3 classes): {avg_recall:.3f}")
    print(f"Average F1-Score (first 3 classes): {avg_f1_score:.3f}")

    metrics = {
        'acc': acc,
        'conf_mat': conf_mat,
        'sens (recall)': avg_recall,
        'f1': avg_f1_score,
        'prec': avg_precision
    }

  elif (num_classes == 2) :

    # Convert y_val to multiclass format
    # y_val = np.argmax(y_val, axis=1)

    # # Convert y_pred to multiclass format
    # y_pred = np.argmax(y_pred, axis=1)

    # print('y_pred', y_pred)
    # print('y_val', y_val)

    # Check the type of y_pred_probs
    print("Type of y_pred using model.predict:", type(y_pred))
    print("shape of the y_pred using model.predict:", y_pred.shape)

    # Compute confusion matrix
    conf_mat = confusion_matrix(y_val, y_pred)

    # Compute confusion matrix
    # y_pred = np.argmax(y_pred, axis=1)
    # conf_mat = confusion_matrix(y_val, y_pred)

    # Compute metrics from confusion matrix
    tn, fp, fn, tp = conf_mat.ravel()
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    acc = accuracy_score(y_val, y_pred)

    metrics = {
        'acc': acc,
        'conf_mat': conf_mat,
        'sens (recall)': recall,
        'f1': f1,
        'prec': precision,
        'tn': tn,
        'tp': tp,
        'fn': fn,
        'fp': fp
    }

  else :
    metrics = {}

  return metrics

In [7]:
'''
# Replacing nan values to 1
def nan_to_0(data):

    df1 = data.copy()

    for idx, row in df1.iterrows():
      arr = row['psi_matrix']
      matrix = np.nan_to_num(arr, copy = True, nan = 0.0)
      df1.at[idx, 'psi_matrix'] = matrix

    print(df1)

    return df1
'''

"\n# Replacing nan values to 1\ndef nan_to_0(data):\n\n    df1 = data.copy()\n\n    for idx, row in df1.iterrows():\n      arr = row['psi_matrix']\n      matrix = np.nan_to_num(arr, copy = True, nan = 0.0)\n      df1.at[idx, 'psi_matrix'] = matrix\n\n    print(df1)\n\n    return df1\n"

In [8]:
'''
def computeMinMax(X):
  min_matrix = X.min(axis = 0)
  max_matrix = X.max(axis = 0)
  return (min_matrix, max_matrix)
'''

'\ndef computeMinMax(X):\n  min_matrix = X.min(axis = 0)\n  max_matrix = X.max(axis = 0)\n  return (min_matrix, max_matrix)\n'

In [9]:
'''
def normalize_instance(X, minn, maxx):
  normalised_X = np.zeros(shape=(X.shape[0], X.shape[1]))

  for idx, x in np.ndenumerate(X):
    if minn[idx] == maxx[idx]:
      normalised_X[idx] = x
    else:
      normalised_X[idx] = (x - minn[idx])/(maxx[idx] - minn[idx])
    return normalised_X
'''

'\ndef normalize_instance(X, minn, maxx):\n  normalised_X = np.zeros(shape=(X.shape[0], X.shape[1]))\n\n  for idx, x in np.ndenumerate(X):\n    if minn[idx] == maxx[idx]:\n      normalised_X[idx] = x\n    else:\n      normalised_X[idx] = (x - minn[idx])/(maxx[idx] - minn[idx])\n    return normalised_X\n'

In [10]:
'''
def normalize(X_train, X_val):
    # Assuming X_train is your DataFrame with matrices in a single column
    matrices = X_train  # Get the values from the 'matrices' column
    # Convert the matrices to a 2D NumPy array
    X_train_2d = np.stack(matrices)

    # Assuming X_train is your DataFrame with matrices in a single column
    matrices = X_val  # Get the values from the 'matrices' column
    # Convert the matrices to a 2D NumPy array
    X_val_2d = np.stack(matrices)

    min_matrix, max_matrix = computeMinMax(X_train_2d)

    print("shape of min matrix", min_matrix.shape)
    print("shape of max matrix", max_matrix.shape)

    normalized_instances = []
    for instance in X_train_2d:
        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)
        normalized_instances.append(normalized_instance)

    # Convert the list of normalized instances to a NumPy array
    X_normalized_trained_2d = np.array(normalized_instances)

    normalized_instances = []
    for instance in X_val_2d:
        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)
        normalized_instances.append(normalized_instance)

    # Convert the list of normalized instances to a NumPy array
    X_normalized_val_2d = np.array(normalized_instances)

    return (X_normalized_trained_2d, X_normalized_val_2d)
'''

'\ndef normalize(X_train, X_val):\n    # Assuming X_train is your DataFrame with matrices in a single column\n    matrices = X_train  # Get the values from the \'matrices\' column\n    # Convert the matrices to a 2D NumPy array\n    X_train_2d = np.stack(matrices)\n\n    # Assuming X_train is your DataFrame with matrices in a single column\n    matrices = X_val  # Get the values from the \'matrices\' column\n    # Convert the matrices to a 2D NumPy array\n    X_val_2d = np.stack(matrices)\n\n    min_matrix, max_matrix = computeMinMax(X_train_2d)\n\n    print("shape of min matrix", min_matrix.shape)\n    print("shape of max matrix", max_matrix.shape)\n\n    normalized_instances = []\n    for instance in X_train_2d:\n        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)\n        normalized_instances.append(normalized_instance)\n\n    # Convert the list of normalized instances to a NumPy array\n    X_normalized_trained_2d = np.array(normalized_instances)\n\n  

In [11]:
'''
def set_threshold(df):
    thresholds = []  # Initialize an empty list to store thresholds

    # Iterate over each row in the DataFrame
    for index, row in df.iterrows():
        # Extract the matrix from the attribute of the current row
        matrix = row['psi_matrix']

        # Flatten the matrix into a 1D array
        flat_matrix = matrix.flatten()

        # Create a histogram of the values in the array
        hist, bins = np.histogram(flat_matrix, bins=5)  # Adjust the number of bins as needed

        # Find the bin with the highest count
        max_count_index = np.argmax(hist)

        # Determine the corresponding value (bin edge) as the threshold
        threshold_value = bins[max_count_index + 1]

        # Print the threshold value for the current row (optional)
        print("Threshold value for row", index, ":", threshold_value)

        # Append the threshold value to the list
        thresholds.append(threshold_value)

    # Calculate the mean of the thresholds
    mean_threshold = np.mean(thresholds)

    return mean_threshold
'''

'\ndef set_threshold(df):\n    thresholds = []  # Initialize an empty list to store thresholds\n\n    # Iterate over each row in the DataFrame\n    for index, row in df.iterrows():\n        # Extract the matrix from the attribute of the current row\n        matrix = row[\'psi_matrix\']\n\n        # Flatten the matrix into a 1D array\n        flat_matrix = matrix.flatten()\n\n        # Create a histogram of the values in the array\n        hist, bins = np.histogram(flat_matrix, bins=5)  # Adjust the number of bins as needed\n\n        # Find the bin with the highest count\n        max_count_index = np.argmax(hist)\n\n        # Determine the corresponding value (bin edge) as the threshold\n        threshold_value = bins[max_count_index + 1]\n\n        # Print the threshold value for the current row (optional)\n        print("Threshold value for row", index, ":", threshold_value)\n\n        # Append the threshold value to the list\n        thresholds.append(threshold_value)\n\n    # Calcu

In [12]:
'''
def binarize_matrices(df, threshold):
    # Make a copy of the original DataFrame
    final_df = df.copy()

    # Iterate over each row and update the 'psi_matrix' column
    for index, row in final_df.iterrows():
        matrix = row['psi_matrix']
        newmatrix = np.where(matrix>=threshold, 1, 0)
        # Update the matrix attribute in the copied DataFrame
        final_df.at[index, 'psi_matrix'] = newmatrix

    return final_df
'''

"\ndef binarize_matrices(df, threshold):\n    # Make a copy of the original DataFrame\n    final_df = df.copy()\n\n    # Iterate over each row and update the 'psi_matrix' column\n    for index, row in final_df.iterrows():\n        matrix = row['psi_matrix']\n        newmatrix = np.where(matrix>=threshold, 1, 0)\n        # Update the matrix attribute in the copied DataFrame\n        final_df.at[index, 'psi_matrix'] = newmatrix\n\n    return final_df\n"

In [13]:
'''
def remove_loop(df):

  final_df = df.copy()

  # Define the identity matrix
  identity_matrix = np.eye(132)

  # Iterate over each row
  for i, row in final_df.iterrows():
      # Check if the first column contains a NumPy array
      if isinstance(row['psi_matrix'], np.ndarray):
          # Subtract the identity matrix from the NumPy array
          final_df.at[i, 'psi_matrix'] = row['psi_matrix'] - identity_matrix
      else:
          # Skip this row if the first column doesn't contain a NumPy array
          print(f"Skipping row {i}: First column doesn't contain a NumPy array.")

  return final_df
'''

'\ndef remove_loop(df):\n\n  final_df = df.copy()\n\n  # Define the identity matrix\n  identity_matrix = np.eye(132)\n\n  # Iterate over each row\n  for i, row in final_df.iterrows():\n      # Check if the first column contains a NumPy array\n      if isinstance(row[\'psi_matrix\'], np.ndarray):\n          # Subtract the identity matrix from the NumPy array\n          final_df.at[i, \'psi_matrix\'] = row[\'psi_matrix\'] - identity_matrix\n      else:\n          # Skip this row if the first column doesn\'t contain a NumPy array\n          print(f"Skipping row {i}: First column doesn\'t contain a NumPy array.")\n\n  return final_df\n'

In [14]:
'''
def extract_features(df):
    # Initialize lists to store the calculated features
    clustering_coefficients = []
    average_node_degrees = []
    global_efficiencies = []
    characteristic_path_lengths = []
    assortativity = []

    # Iterate over each row in the input DataFrame
    for index, row in df.iterrows():
        # Step 1: Extract the thresholded adjacency matrix
        adjacency_matrix = row['psi_matrix']

        # Step 2: Convert the adjacency matrix to a NetworkX graph
        G = nx.from_numpy_array(adjacency_matrix)

        # Step 3: Calculate the topological features
        clustering_coefficient = nx.average_clustering(G)
        average_node_degree = sum(dict(G.degree()).values()) / len(G)
        global_efficiency = nx.global_efficiency(G)
        try:
            characteristic_path_length = nx.average_shortest_path_length(G)
        except nx.NetworkXError:
            characteristic_path_length = 0

        assort = nx.degree_assortativity_coefficient(G)

        # Step 4: Append the calculated features to the lists
        clustering_coefficients.append(clustering_coefficient)
        average_node_degrees.append(average_node_degree)
        global_efficiencies.append(global_efficiency)
        characteristic_path_lengths.append(characteristic_path_length)
        assortativity.append(assort)

    # Create a new DataFrame with the calculated features and original columns
    topological_features_df = pd.DataFrame({
        'subject': df['subject'],
        'clustering_coefficient': clustering_coefficients,
        'average_node_degree': average_node_degrees,
        'global_efficiency': global_efficiencies,
        'characteristic_path_length': characteristic_path_lengths,
        'assortativity': assortativity,
        'autism': df['autism'],
        'adhd': df['adhd'],
        'healthy': df['healthy']
    })

    # Print the DataFrame with calculated features
    print(topological_features_df)
    topological_features_df.fillna(0, inplace=True)

    return topological_features_df
'''

"\ndef extract_features(df):\n    # Initialize lists to store the calculated features\n    clustering_coefficients = []\n    average_node_degrees = []\n    global_efficiencies = []\n    characteristic_path_lengths = []\n    assortativity = []\n\n    # Iterate over each row in the input DataFrame\n    for index, row in df.iterrows():\n        # Step 1: Extract the thresholded adjacency matrix\n        adjacency_matrix = row['psi_matrix']\n\n        # Step 2: Convert the adjacency matrix to a NetworkX graph\n        G = nx.from_numpy_array(adjacency_matrix)\n\n        # Step 3: Calculate the topological features\n        clustering_coefficient = nx.average_clustering(G)\n        average_node_degree = sum(dict(G.degree()).values()) / len(G)\n        global_efficiency = nx.global_efficiency(G)\n        try:\n            characteristic_path_length = nx.average_shortest_path_length(G)\n        except nx.NetworkXError:\n            characteristic_path_length = 0\n\n        assort = nx.degree_

In [15]:
def y_convert(y_one_hot):
    """
    Decodes one-hot encoded labels to single integer labels.

    Args:
    y_one_hot (np.ndarray): Array of one-hot encoded labels.

    Returns:
    np.ndarray: Array of integer labels.
    """
    return np.argmax(y_one_hot, axis=1)

In [16]:
def features_mean(df):
    # Create a copy of the DataFrame
    final_df = df.copy()

    # Set thresholds between 0.65 and 0.85 (inclusive) with a step size of 0.01
    thresholds = np.arange(0.65, 0.86, 0.01)

    # Initialize lists to store the calculated features and labels
    clustering_coefficients_list = []
    average_node_degrees_list = []
    global_efficiencies_list = []
    characteristic_path_lengths_list = []
    assortativity_list = []

    # Iterate over each row in the DataFrame
    for index, row in final_df.iterrows():
        # Extract the matrix from the attribute of the current row
        matrix = row['psi_matrix']

        # Initialize lists to store features for each threshold
        clustering_coefficients = []
        average_node_degrees = []
        global_efficiencies = []
        characteristic_path_lengths = []
        assortativity = []

        # Iterate over the thresholds
        for threshold in thresholds:
            # Binarize the matrix based on the threshold
            matrix_thresholded = np.where(matrix >= threshold, 1, 0)

            # Remove self-loops from the matrix
            identity_matrix = np.eye(matrix_thresholded.shape[0])
            matrix_thresholded = matrix_thresholded - identity_matrix

            # Convert the matrix to a NetworkX graph
            G = nx.from_numpy_array(matrix_thresholded)

            # Calculate the topological features
            clustering_coefficient = nx.average_clustering(G)
            average_node_degree = sum(dict(G.degree()).values()) / len(G)
            global_efficiency = nx.global_efficiency(G)
            try:
                characteristic_path_length = nx.average_shortest_path_length(G)
            except nx.NetworkXError:
                characteristic_path_length = 0

            assort = nx.degree_assortativity_coefficient(G)

            # Append the calculated features to the respective lists
            clustering_coefficients.append(clustering_coefficient)
            average_node_degrees.append(average_node_degree)
            global_efficiencies.append(global_efficiency)
            characteristic_path_lengths.append(characteristic_path_length)
            assortativity.append(assort)

        # Calculate the average of the features across all thresholds
        avg_clustering_coefficient = np.mean(clustering_coefficients)
        avg_average_node_degree = np.mean(average_node_degrees)
        avg_global_efficiency = np.mean(global_efficiencies)
        avg_characteristic_path_length = np.mean(characteristic_path_lengths)
        avg_assortativity = np.mean(assortativity)

        # Append the averaged features and label to the respective lists
        clustering_coefficients_list.append(avg_clustering_coefficient)
        average_node_degrees_list.append(avg_average_node_degree)
        global_efficiencies_list.append(avg_global_efficiency)
        characteristic_path_lengths_list.append(avg_characteristic_path_length)
        assortativity_list.append(avg_assortativity)

    # Create a new DataFrame with the averaged features and labels
    topological_features_df = pd.DataFrame({
        'subject': final_df['subject'],
        'clustering_coefficient': clustering_coefficients_list,
        'average_node_degree': average_node_degrees_list,
        'global_efficiency': global_efficiencies_list,
        'characteristic_path_length': characteristic_path_lengths_list,
        'assortativity': assortativity_list,
        'autism': final_df['autism'],
        'adhd': final_df['adhd'],
        'healthy': final_df['healthy'],
    })

    # Print the DataFrame with averaged features
    print(topological_features_df)
    topological_features_df.fillna(0, inplace=True)

    return topological_features_df

In [17]:
def make_dataset(choice):
    # Load dataframe from the pickle file
    data = pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/ROIxTimeseries/psi_data.pkl')

    if choice == 'A':
        # Filter rows where 'adhd' or 'autism' is 1 (keep only ADHD or autism subjects)
        data = data[(data['adhd'] == 1) | (data['autism'] == 1)]
        y_cols = ['adhd', 'autism']  # Specify the columns for y
    elif choice == 'B':
        # Filter rows where 'autism' or 'healthy' is 1 (keep only autism or healthy subjects)
        data = data[(data['autism'] == 1) | (data['healthy'] == 1)]
        y_cols = ['autism', 'healthy']  # Specify the columns for y
    elif choice == 'C':
        # Filter rows where 'adhd' or 'healthy' is 1 (keep only ADHD or healthy subjects)
        data = data[(data['adhd'] == 1) | (data['healthy'] == 1)]
        y_cols = ['adhd', 'healthy']  # Specify the columns for y
    elif choice == 'D':
        # Keep all rows
        y_cols = ['adhd', 'autism', 'healthy']  # Specify the columns for y
    else:
        print("Invalid choice. Please enter 'A', 'B', 'C', or 'D'.")
        return pd.DataFrame(), []

    print(data)

    # df1 = nan_to_0(data)

    print(y_cols)
    return data, y_cols

In [18]:
def driver(choice):

    # choice = input("Enter your choice (A, B, C, or D): ").upper()

    choice = choice.upper()

    df, y_cols = make_dataset(choice)
    #print(d.head)

    # avg_thresh = set_threshold(df)
    # print("Mean Threshold", avg_thresh)
    # thresh = 0.3

    f_df = features_mean(df)

    # df = binarize_matrices(df)
    # df_loop = remove_loop(df)
    # f_df = extract_features(df_loop)

    X = f_df.drop(columns=['subject','autism','adhd','healthy'])
    print(X.isna().sum())
    # print(X.shape)
    # print(X)
    print("type of matrices", type(X))
    y = f_df[y_cols].values
    #y = to_categorical(y, num_classes=3)
    # print(y.shape)
    # print(y)
    print("type of label columns", type(y))

    y_argmax = y_convert(y)

    # print(y_argmax.shape)
    # print(y_argmax)
    print("type of label columns y_argmax", type(y_argmax))

    # Get the number of classes
    num_classes = y.shape[1]
    print("No. of classes", num_classes)

    # input_shape = X[0].shape
    # print("Input_shape:", input_shape)

    if (num_classes == 2) :
      result_df = pd.DataFrame(columns = ['seed','fold','acc','conf_mat', 'sens (recall)','f1','prec', 'tn', 'tp', 'fn', 'fp'])
    elif (num_classes == 3) :
      result_df = pd.DataFrame(columns = ['seed','fold','acc','conf_mat', 'sens (recall)','f1','prec'])
    else :
      result_df = {}

    # Manually set random seeds
    random_seeds = np.array([93, 98, 40, 19, 52, 74, 31, 66, 56, 22])
    print("Random seeds for outer loops:", random_seeds)

    all_results = []

    for outer_loop, random_seed in enumerate(random_seeds):
        print(f"Outer loop iteration: {outer_loop + 1}, Random seed: {random_seed}")

        kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_seed)

        for i, (train_index, val_index) in enumerate(kf.split(X, y_argmax)):

            print("FOLD : ", i+1)

            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y_argmax[train_index], y_argmax[val_index]

            mmx = MinMaxScaler()
            X_train = mmx.fit_transform(X_train)
            X_val = mmx.transform(X_val)

            # compiled_m = get_model(num_classes)
            compiled_m = get_model(num_classes, choice)

            trained_m = compile_fit(compiled_m, X_train, np.array(y_train))
            # plot_history(history, i+1)

            scores = eval_model(num_classes, trained_m, X_val, y_val, y_cols)
            scores['seed']=random_seed
            scores['fold']=i+1
            print("Scores", scores)
            scores = pd.DataFrame([scores])
            result_df = pd.concat([result_df,scores], ignore_index=True)

        all_results.append(result_df)
        print(f"Outer loop {outer_loop + 1} result_df:")
        print(result_df)
        print("\n")

    return all_results

In [19]:
# Define a list of choices
choices = ['A', 'B', 'C', 'D']
# choices = ['A', 'B', 'C']

# Create an empty dictionary to store the result dataframes
result_dfs = {}

# Loop through each choice
for choice in choices:
    # Call the driver() function with the current choice
    result_df = driver(choice)

    # Store the result dataframe in the dictionary with the choice as the key
    result_dfs[choice] = result_df

    # # Combine all results for this choice into a single DataFrame
    # combined_df = pd.DataFrame()
    # for i, df in enumerate(result_df):
    #     df['outer_loop'] = i + 1  # Add a column to identify the outer loop
    #     combined_df = pd.concat([combined_df, df], ignore_index=True)

    # storing just last outer loop
    # Get the last outer loop result (which is the last DataFrame in the list)
    combined_df = result_df[-1]

    # Save the combined results to a CSV file
    filename = f"choice_{choice}_results.csv"
    combined_df.to_csv(f'/content/drive/MyDrive/Colab Notebooks/PSI_20features_Globalfeatures/minmax/SVM/{filename}', index=False)
    print(f"Saved combined results for choice {choice} to {filename}")

        subject                                         psi_matrix  adhd  \
0      subject1  [[1.0, 0.8016814987579317, 0.839641852247262, ...     0   
1      subject2  [[1.0, 0.9101210641534417, 0.576123749895304, ...     0   
2      subject3  [[1.0, 0.7530150385517623, 0.5067414702685676,...     0   
3      subject4  [[1.0, 0.817812140965262, 0.520061068095793, 0...     0   
4      subject5  [[1.0, 0.7477157295145737, 0.8259250654415865,...     0   
..          ...                                                ...   ...   
105  subject106  [[1.0, 0.7190486619620364, 0.3595353948075287,...     1   
106  subject107  [[1.0, 0.892350228080446, 0.6115053964108019, ...     1   
107  subject108  [[1.0, 0.9383744300499544, 0.7480842785117325,...     1   
108  subject109  [[1.0, 0.9300675513188588, 0.9240553408215981,...     1   
109  subject110  [[1.0, 0.7946738858174632, 0.5573150422007396,...     1   

     autism  healthy  
0         1        0  
1         1        0  
2         1       

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))
  a = a / a.sum()


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.569813            31.042569   
1      subject2                0.203045             3.563492   
2      subject3                0.297265             5.054834   
3      subject4                0.177045             1.911977   
4      subject5                0.210130             3.884560   
..          ...                     ...                  ...   
105  subject106                0.148429             2.415584   
106  subject107                0.484921            20.016595   
107  subject108                0.498252            23.272006   
108  subject109                0.932566           110.243867   
109  subject110                0.240849             5.045455   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.403141                         0.0      -0.053484       1   
1             0.083907                         0.0       0.365881       1   


  result_df = pd.concat([result_df,scores], ignore_index=True)


y_pred [0 0 0 0 1 1 1 1 0 1 0 0 0 0 1 1 1 1 1 0 0 0]
y_val [1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.4090909090909091, 'conf_mat': array([[4, 5],
       [8, 5]]), 'sens (recall)': 0.38461538461538464, 'f1': 0.4347826086956522, 'prec': 0.5, 'tn': 4, 'tp': 5, 'fn': 8, 'fp': 5, 'seed': 98, 'fold': 3}
FOLD :  4
y_pred [1 1 0 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 0 0 1 0]
y_val [1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.5909090909090909, 'conf_mat': array([[5, 5],
       [4, 8]]), 'sens (recall)': 0.6666666666666666, 'f1': 0.64, 'prec': 0.6153846153846154, 'tn': 5, 'tp': 8, 'fn': 4, 'fp': 5, 'seed': 98, 'fold': 4}
FOLD :  5
y_pred [1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 1]
y_val [1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Type of y_pred using 

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.569813            31.042569   
1      subject2                0.203045             3.563492   
2      subject3                0.297265             5.054834   
3      subject4                0.177045             1.911977   
4      subject5                0.210130             3.884560   
..          ...                     ...                  ...   
160  subject161                0.370960            11.876623   
161  subject162                0.233604             4.710678   
162  subject163                0.503595            21.367965   
163  subject164                0.540719            28.873737   
164  subject165                0.794633            72.487734   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.403141                         0.0      -0.053484       1   
1             0.083907                         0.0       0.365881       1   


  result_df = pd.concat([result_df,scores], ignore_index=True)


Scores {'acc': 0.6363636363636364, 'conf_mat': array([[9, 3],
       [5, 5]]), 'sens (recall)': 0.5, 'f1': 0.5555555555555556, 'prec': 0.625, 'tn': 9, 'tp': 5, 'fn': 5, 'fp': 3, 'seed': 98, 'fold': 4}
FOLD :  5
y_pred [0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 0 1 1]
y_val [0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.5454545454545454, 'conf_mat': array([[7, 1],
       [9, 5]]), 'sens (recall)': 0.35714285714285715, 'f1': 0.5, 'prec': 0.8333333333333334, 'tn': 7, 'tp': 5, 'fn': 9, 'fp': 1, 'seed': 98, 'fold': 5}
Outer loop 2 result_df:
  seed fold       acc           conf_mat  sens (recall)        f1      prec  \
0   93    1  0.636364   [[7, 3], [5, 7]]       0.583333  0.636364  0.700000   
1   93    2  0.454545   [[6, 5], [7, 4]]       0.363636  0.400000  0.444444   
2   93    3  0.500000  [[7, 1], [10, 4]]       0.285714  0.421053  0.800000   
3   93    4  0.363636 

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))
  a = a / a.sum()


        subject  clustering_coefficient  average_node_degree  \
55    subject56                0.325442             5.897547   
56    subject57                0.149920             1.618326   
57    subject58                0.154187             1.645743   
58    subject59                0.461547            19.377345   
59    subject60                0.143685             2.141414   
..          ...                     ...                  ...   
160  subject161                0.370960            11.876623   
161  subject162                0.233604             4.710678   
162  subject163                0.503595            21.367965   
163  subject164                0.540719            28.873737   
164  subject165                0.794633            72.487734   

     global_efficiency  characteristic_path_length  assortativity  autism  \
55            0.177905                         0.0       0.247016       0   
56            0.052483                         0.0       0.423212       0   


  result_df = pd.concat([result_df,scores], ignore_index=True)


Scores {'acc': 0.5454545454545454, 'conf_mat': array([[7, 2],
       [8, 5]]), 'sens (recall)': 0.38461538461538464, 'f1': 0.5, 'prec': 0.7142857142857143, 'tn': 7, 'tp': 5, 'fn': 8, 'fp': 2, 'seed': 98, 'fold': 2}
FOLD :  3
y_pred [1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 1 1 0 0 0 0 1]
y_val [0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.3181818181818182, 'conf_mat': array([[ 3, 10],
       [ 5,  4]]), 'sens (recall)': 0.4444444444444444, 'f1': 0.34782608695652173, 'prec': 0.2857142857142857, 'tn': 3, 'tp': 4, 'fn': 5, 'fp': 10, 'seed': 98, 'fold': 3}
FOLD :  4
y_pred [0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1]
y_val [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.7272727272727273, 'conf_mat': array([[9, 3],
       [3, 7]]), 'sens (recall)': 0.7, 'f1':

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))
  a = a / a.sum()


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.569813            31.042569   
1      subject2                0.203045             3.563492   
2      subject3                0.297265             5.054834   
3      subject4                0.177045             1.911977   
4      subject5                0.210130             3.884560   
..          ...                     ...                  ...   
160  subject161                0.370960            11.876623   
161  subject162                0.233604             4.710678   
162  subject163                0.503595            21.367965   
163  subject164                0.540719            28.873737   
164  subject165                0.794633            72.487734   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.403141                         0.0      -0.053484       1   
1             0.083907                         0.0       0.365881       1   


  result_df = pd.concat([result_df,scores], ignore_index=True)


classification report               precision    recall  f1-score   support

        adhd      0.000     0.000     0.000         9
      autism      0.167     0.154     0.160        13
     healthy      0.176     0.273     0.214        11

    accuracy                          0.152        33
   macro avg      0.114     0.142     0.125        33
weighted avg      0.124     0.152     0.134        33

classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.000000  0.000000  0.000000   9.000000
autism         0.166667  0.153846  0.160000  13.000000
healthy        0.176471  0.272727  0.214286  11.000000
accuracy       0.151515  0.151515  0.151515   0.151515
macro avg      0.114379  0.142191  0.124762  33.000000
weighted avg   0.124480  0.151515  0.134459  33.000000
Average Precision (first 3 classes): 0.114
Average Recall (first 3 classes): 0.142
Average F1-Score (first 3 classes): 0.125
Scores {'acc':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classification report               precision    recall  f1-score   support

        adhd      0.000     0.000     0.000        15
      autism      0.350     0.778     0.483         9
     healthy      0.308     0.444     0.364         9

    accuracy                          0.333        33
   macro avg      0.219     0.407     0.282        33
weighted avg      0.179     0.333     0.231        33

classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.000000  0.000000  0.000000  15.000000
autism         0.350000  0.777778  0.482759   9.000000
healthy        0.307692  0.444444  0.363636   9.000000
accuracy       0.333333  0.333333  0.333333   0.333333
macro avg      0.219231  0.407407  0.282132  33.000000
weighted avg   0.179371  0.333333  0.230835  33.000000
Average Precision (first 3 classes): 0.219
Average Recall (first 3 classes): 0.407
Average F1-Score (first 3 classes): 0.282
Scores {'acc':

In [None]:
# # Define a list of choices
# # choices = ['A', 'B', 'C', 'D']
# choices = ['A']

# # Create an empty dictionary to store the result dataframes
# result_dfs = {}

# # Loop through each choice
# for choice in choices:
#     # Call the driver() function with the current choice
#     result_df = driver(choice)

#     # Store the result dataframe in the dictionary with the choice as the key
#     result_dfs[choice] = result_df

        subject                                         psi_matrix  adhd  \
0      subject1  [[1.0, 0.8016814987579317, 0.839641852247262, ...     0   
1      subject2  [[1.0, 0.9101210641534417, 0.576123749895304, ...     0   
2      subject3  [[1.0, 0.7530150385517623, 0.5067414702685676,...     0   
3      subject4  [[1.0, 0.817812140965262, 0.520061068095793, 0...     0   
4      subject5  [[1.0, 0.7477157295145737, 0.8259250654415865,...     0   
..          ...                                                ...   ...   
105  subject106  [[1.0, 0.7190486619620364, 0.3595353948075287,...     1   
106  subject107  [[1.0, 0.892350228080446, 0.6115053964108019, ...     1   
107  subject108  [[1.0, 0.9383744300499544, 0.7480842785117325,...     1   
108  subject109  [[1.0, 0.9300675513188588, 0.9240553408215981,...     1   
109  subject110  [[1.0, 0.7946738858174632, 0.5573150422007396,...     1   

     autism  healthy  
0         1        0  
1         1        0  
2         1       

  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))
  a = a / a.sum()


        subject  clustering_coefficient  average_node_degree  \
0      subject1                0.569813            31.042569   
1      subject2                0.203045             3.563492   
2      subject3                0.297265             5.054834   
3      subject4                0.177045             1.911977   
4      subject5                0.210130             3.884560   
..          ...                     ...                  ...   
105  subject106                0.148429             2.415584   
106  subject107                0.484921            20.016595   
107  subject108                0.498252            23.272006   
108  subject109                0.932566           110.243867   
109  subject110                0.240849             5.045455   

     global_efficiency  characteristic_path_length  assortativity  autism  \
0             0.403141                         0.0      -0.053484       1   
1             0.083907                         0.0       0.365881       1   


In [None]:
# print(result_dfs['A'])
# result_dfs['A'].to_csv('results/PSI(20 features)_SVM-globalfeatures/adhd-autism.csv', mode = 'w', index=False)

  fold       acc           conf_mat  sens (recall)        f1      prec  tn tp  \
0    1  0.545455  [[10, 2], [8, 2]]       0.200000  0.285714  0.500000  10  2   
1    2  0.363636  [[3, 4], [10, 5]]       0.333333  0.416667  0.555556   3  5   
2    3  0.545455   [[6, 3], [7, 6]]       0.461538  0.545455  0.666667   6  6   
3    4  0.590909   [[6, 9], [0, 7]]       1.000000  0.608696  0.437500   6  7   
4    5  0.590909   [[8, 4], [5, 5]]       0.500000  0.526316  0.555556   8  5   

   fn fp  
0   8  2  
1  10  4  
2   7  3  
3   0  9  
4   5  4  


OSError: Cannot save file into a non-existent directory: 'results/PSI(20 features)_RF-globalfeatures'

In [None]:
# print(result_dfs['B'])
# result_dfs['B'].to_csv('results/PSI(20 features)_SVM-globalfeatures/autism-healthy.csv', mode = 'w', index=False)

In [None]:
# print(result_dfs['C'])
# result_dfs['C'].to_csv('results/PSI(20 features)_SVM-globalfeatures/adhd-healthy.csv', mode = 'w', index=False)

In [None]:
# print(result_dfs['D'])
# result_dfs['D'].to_csv('results/PSI(20 features)_SVM-globalfeatures/adhd-autism-healthy.csv', mode = 'w', index=False)