In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


PSI 20 threshold matrices then mean then features - SVM

In [2]:
import numpy as np
import networkx as nx

from sklearn import svm
from sklearn.svm import SVC
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, f1_score, recall_score, confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn import metrics as mt
from sklearn.preprocessing import MinMaxScaler

import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from matplotlib import pyplot as plt

import csv

In [3]:
n_splits = 5

In [4]:
def get_model(num_classes, choice):
    """
    Creates an SVM model for classification.

    Args:
        num_classes (int): Number of classes in the target variable.

    Returns:
        SVC: An SVM model instance.
    """
    if (choice == 'A'):
      c_val, ker = 12, 'linear'
    elif (choice == 'B'):
      c_val, ker = 3, 'linear'
    elif (choice == 'C'):
      c_val, ker = 4, 'linear'
    elif (choice == 'D'):
      c_val, ker = 5, 'linear'
    else:
      raise ValueError("Invalid choice. Must be 'A', 'B', 'C', or 'D'.")

    svm = SVC(C=c_val, kernel=ker, decision_function_shape='ovr')

    return svm

In [5]:
def compile_fit(model, X_train, y_train):
    model.fit(X_train, y_train)
    return model

In [6]:
def eval_model(num_classes, model, X_val, y_val, y_cols):
  # Compute loss and accuracy using model.evaluate()
  # loss, acc = model.evaluate(X_val, y_val)

  y_pred = model.predict(X_val)
  print('y_pred', y_pred)
  print('y_val', y_val)

  if (num_classes == 3) :

    # Convert y_val to multiclass format
    # y_val = np.argmax(y_val, axis=1)

    # # Convert y_pred to multiclass format
    # y_pred = np.argmax(y_pred, axis=1)

    # print('y_pred', y_pred)
    # print('y_val', y_val)

    # Check the type of y_pred_probs
    print("Type of y_pred using model.predict:", type(y_pred))
    print("shape of the y_pred using model.predict:", y_pred.shape)

    # Compute confusion matrix
    # y_val_argmax = np.argmax(y_val, axis=1)
    # y_pred_argmax = np.argmax(y_pred, axis=1)
    # conf_mat = confusion_matrix(y_val_argmax, y_pred_argmax)

    conf_mat = mt.confusion_matrix(y_val, y_pred)
    print("confusion matrix ", conf_mat)

    target_names = y_cols

    print("classification report", mt.classification_report(y_val, y_pred, target_names=target_names, digits = 3))

    # Compute classification report
    report = mt.classification_report(y_val, y_pred, target_names=target_names, output_dict=True)
    report_df = pd.DataFrame(report).T

    print("classification report in dataframe - match accuracy with model.evaluate ")
    print(report_df)

    acc = report_df.iloc[3,1]

    # Select the first three rows
    report_df_top3 = report_df.head(3)

    # Calculate average metrics for the first three rows
    avg_precision = report_df_top3['precision'].mean()
    avg_recall = report_df_top3['recall'].mean()
    avg_f1_score = report_df_top3['f1-score'].mean()

    print(f"Average Precision (first 3 classes): {avg_precision:.3f}")
    print(f"Average Recall (first 3 classes): {avg_recall:.3f}")
    print(f"Average F1-Score (first 3 classes): {avg_f1_score:.3f}")

    metrics = {
        'acc': acc,
        'conf_mat': conf_mat,
        'sens (recall)': avg_recall,
        'f1': avg_f1_score,
        'prec': avg_precision
    }

  elif (num_classes == 2) :

    # # Convert y_val to multiclass format
    # y_val = np.argmax(y_val, axis=1)

    # # Convert y_pred to multiclass format
    # y_pred = np.argmax(y_pred, axis=1)

    # print('y_pred', y_pred)
    # print('y_val', y_val)

    # Check the type of y_pred_probs
    print("Type of y_pred using model.predict:", type(y_pred))
    print("shape of the y_pred using model.predict:", y_pred.shape)

    # Compute confusion matrix
    conf_mat = confusion_matrix(y_val, y_pred)

    # Compute confusion matrix
    # y_pred = np.argmax(y_pred, axis=1)
    # conf_mat = confusion_matrix(y_val, y_pred)

    # Compute metrics from confusion matrix
    tn, fp, fn, tp = conf_mat.ravel()
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    acc = accuracy_score(y_val, y_pred)

    metrics = {
        'acc': acc,
        'conf_mat': conf_mat,
        'sens (recall)': recall,
        'f1': f1,
        'prec': precision,
        'tn': tn,
        'tp': tp,
        'fn': fn,
        'fp': fp
    }

  else :
    metrics = {}

  return metrics

In [7]:
'''
# Replacing nan values to 1
def nan_to_0(data):

    df1 = data.copy()

    for idx, row in df1.iterrows():
      arr = row['psi_matrix']
      matrix = np.nan_to_num(arr, copy = True, nan = 0.0)
      df1.at[idx, 'psi_matrix'] = matrix

    print(df1)

    return df1
'''

"\n# Replacing nan values to 1\ndef nan_to_0(data):\n\n    df1 = data.copy()\n\n    for idx, row in df1.iterrows():\n      arr = row['psi_matrix']\n      matrix = np.nan_to_num(arr, copy = True, nan = 0.0)\n      df1.at[idx, 'psi_matrix'] = matrix\n\n    print(df1)\n\n    return df1\n"

In [8]:
'''
def computeMinMax(X):
  min_matrix = X.min(axis = 0)
  max_matrix = X.max(axis = 0)
  return (min_matrix, max_matrix)
'''

'\ndef computeMinMax(X):\n  min_matrix = X.min(axis = 0)\n  max_matrix = X.max(axis = 0)\n  return (min_matrix, max_matrix)\n'

In [9]:
'''
def normalize_instance(X, minn, maxx):
  normalised_X = np.zeros(shape=(X.shape[0], X.shape[1]))

  for idx, x in np.ndenumerate(X):
    if minn[idx] == maxx[idx]:
      normalised_X[idx] = x
    else:
      normalised_X[idx] = (x - minn[idx])/(maxx[idx] - minn[idx])
    return normalised_X
'''

'\ndef normalize_instance(X, minn, maxx):\n  normalised_X = np.zeros(shape=(X.shape[0], X.shape[1]))\n\n  for idx, x in np.ndenumerate(X):\n    if minn[idx] == maxx[idx]:\n      normalised_X[idx] = x\n    else:\n      normalised_X[idx] = (x - minn[idx])/(maxx[idx] - minn[idx])\n    return normalised_X\n'

In [10]:
'''
def normalize(X_train, X_val):
    # Assuming X_train is your DataFrame with matrices in a single column
    matrices = X_train  # Get the values from the 'matrices' column
    # Convert the matrices to a 2D NumPy array
    X_train_2d = np.stack(matrices)

    # Assuming X_train is your DataFrame with matrices in a single column
    matrices = X_val  # Get the values from the 'matrices' column
    # Convert the matrices to a 2D NumPy array
    X_val_2d = np.stack(matrices)

    min_matrix, max_matrix = computeMinMax(X_train_2d)

    print("shape of min matrix", min_matrix.shape)
    print("shape of max matrix", max_matrix.shape)

    normalized_instances = []
    for instance in X_train_2d:
        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)
        normalized_instances.append(normalized_instance)

    # Convert the list of normalized instances to a NumPy array
    X_normalized_trained_2d = np.array(normalized_instances)

    normalized_instances = []
    for instance in X_val_2d:
        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)
        normalized_instances.append(normalized_instance)

    # Convert the list of normalized instances to a NumPy array
    X_normalized_val_2d = np.array(normalized_instances)

    return (X_normalized_trained_2d, X_normalized_val_2d)
'''

'\ndef normalize(X_train, X_val):\n    # Assuming X_train is your DataFrame with matrices in a single column\n    matrices = X_train  # Get the values from the \'matrices\' column\n    # Convert the matrices to a 2D NumPy array\n    X_train_2d = np.stack(matrices)\n\n    # Assuming X_train is your DataFrame with matrices in a single column\n    matrices = X_val  # Get the values from the \'matrices\' column\n    # Convert the matrices to a 2D NumPy array\n    X_val_2d = np.stack(matrices)\n\n    min_matrix, max_matrix = computeMinMax(X_train_2d)\n\n    print("shape of min matrix", min_matrix.shape)\n    print("shape of max matrix", max_matrix.shape)\n\n    normalized_instances = []\n    for instance in X_train_2d:\n        normalized_instance = normalize_instance(instance, min_matrix, max_matrix)\n        normalized_instances.append(normalized_instance)\n\n    # Convert the list of normalized instances to a NumPy array\n    X_normalized_trained_2d = np.array(normalized_instances)\n\n  

In [11]:
def binarize_matrices(df):
    # Create a copy of the DataFrame
    final_df = df.copy()

    for index, row in final_df.iterrows():
        # Extract the matrix from the attribute of the current row
        matrix = row['psi_matrix']

        # Initialize an empty list to store the thresholded matrices
        thresholded_matrices = []

        # Set thresholds between 0.65 and 0.85 (inclusive) with a step size of 0.01
        thresholds = np.arange(0.65, 0.85, 0.01)

        # print(thresholds)

        for threshold in thresholds:
            # Binarize the matrix based on the threshold
            matrix_thresholded = np.where(matrix >= threshold, 1, 0)

            # Convert the matrix to a float type
            matrix_thresholded = matrix_thresholded.astype(float) # mean only works for numeric data (0s & 1s being considered boolean)

            # Append the thresholded matrix to the list
            thresholded_matrices.append(matrix_thresholded)

        # Calculate the average of the thresholded matrices
        average_matrix = np.mean(thresholded_matrices, axis=0)

        # Print the thresholded matrices
        # print(f"Averaged Thresholded Matrix for Row {index}:")
        # print(average_matrix)

        # Update the 'psi_matrix' attribute in the original DataFrame with the average matrix
        final_df.at[index, 'psi_matrix'] = average_matrix

    # Print the updated DataFrame
    print("\nUpdated DataFrame:")
    print(final_df)

    return final_df

In [12]:
def y_convert(y_one_hot):
    """
    Decodes one-hot encoded labels to single integer labels.

    Args:
    y_one_hot (np.ndarray): Array of one-hot encoded labels.

    Returns:
    np.ndarray: Array of integer labels.
    """
    return np.argmax(y_one_hot, axis=1)

In [13]:
'''
def set_threshold(df):
    thresholds = []  # Initialize an empty list to store thresholds

    # Iterate over each row in the DataFrame
    for index, row in df.iterrows():
        # Extract the matrix from the attribute of the current row
        matrix = row['psi_matrix']

        # Flatten the matrix into a 1D array
        flat_matrix = matrix.flatten()

        # Create a histogram of the values in the array
        hist, bins = np.histogram(flat_matrix, bins=5)  # Adjust the number of bins as needed

        # Find the bin with the highest count
        max_count_index = np.argmax(hist)

        # Determine the corresponding value (bin edge) as the threshold
        threshold_value = bins[max_count_index + 1]

        # Print the threshold value for the current row (optional)
        print("Threshold value for row", index, ":", threshold_value)

        # Append the threshold value to the list
        thresholds.append(threshold_value)

    # Calculate the mean of the thresholds
    mean_threshold = np.mean(thresholds)

    return mean_threshold
'''

'\ndef set_threshold(df):\n    thresholds = []  # Initialize an empty list to store thresholds\n\n    # Iterate over each row in the DataFrame\n    for index, row in df.iterrows():\n        # Extract the matrix from the attribute of the current row\n        matrix = row[\'psi_matrix\']\n\n        # Flatten the matrix into a 1D array\n        flat_matrix = matrix.flatten()\n\n        # Create a histogram of the values in the array\n        hist, bins = np.histogram(flat_matrix, bins=5)  # Adjust the number of bins as needed\n\n        # Find the bin with the highest count\n        max_count_index = np.argmax(hist)\n\n        # Determine the corresponding value (bin edge) as the threshold\n        threshold_value = bins[max_count_index + 1]\n\n        # Print the threshold value for the current row (optional)\n        print("Threshold value for row", index, ":", threshold_value)\n\n        # Append the threshold value to the list\n        thresholds.append(threshold_value)\n\n    # Calcu

In [14]:
'''
def binarize_matrices(df, threshold):
    # Make a copy of the original DataFrame
    final_df = df.copy()

    # Iterate over each row and update the 'psi_matrix' column
    for index, row in final_df.iterrows():
        matrix = row['psi_matrix']
        newmatrix = np.where(matrix>=threshold, 1, 0)
        # Update the matrix attribute in the copied DataFrame
        final_df.at[index, 'psi_matrix'] = newmatrix

    return final_df
'''

"\ndef binarize_matrices(df, threshold):\n    # Make a copy of the original DataFrame\n    final_df = df.copy()\n\n    # Iterate over each row and update the 'psi_matrix' column\n    for index, row in final_df.iterrows():\n        matrix = row['psi_matrix']\n        newmatrix = np.where(matrix>=threshold, 1, 0)\n        # Update the matrix attribute in the copied DataFrame\n        final_df.at[index, 'psi_matrix'] = newmatrix\n\n    return final_df\n"

In [15]:
def remove_loop(df):

  final_df = df.copy()

  # Define the identity matrix
  identity_matrix = np.eye(132)

  # Iterate over each row
  for i, row in final_df.iterrows():
      # Check if the first column contains a NumPy array
      if isinstance(row['psi_matrix'], np.ndarray):
          # Subtract the identity matrix from the NumPy array
          final_df.at[i, 'psi_matrix'] = row['psi_matrix'] - identity_matrix
      else:
          # Skip this row if the first column doesn't contain a NumPy array
          print(f"Skipping row {i}: First column doesn't contain a NumPy array.")

  return final_df

In [16]:
def extract_features(df):
    # Initialize lists to store the calculated features
    clustering_coefficients = []
    average_node_degrees = []
    global_efficiencies = []
    characteristic_path_lengths = []
    assortativity = []

    # Iterate over each row in the input DataFrame
    for index, row in df.iterrows():
        # Step 1: Extract the thresholded adjacency matrix
        adjacency_matrix = row['psi_matrix']

        # Step 2: Convert the adjacency matrix to a NetworkX graph
        G = nx.from_numpy_array(adjacency_matrix)

        # Step 3: Calculate the topological features
        clustering_coefficient = nx.average_clustering(G)
        average_node_degree = sum(dict(G.degree()).values()) / len(G)
        global_efficiency = nx.global_efficiency(G)
        try:
            characteristic_path_length = nx.average_shortest_path_length(G)
        except nx.NetworkXError:
            characteristic_path_length = 0

        assort = nx.degree_assortativity_coefficient(G)

        # Step 4: Append the calculated features to the lists
        clustering_coefficients.append(clustering_coefficient)
        average_node_degrees.append(average_node_degree)
        global_efficiencies.append(global_efficiency)
        characteristic_path_lengths.append(characteristic_path_length)
        assortativity.append(assort)

    # Create a new DataFrame with the calculated features and original columns
    topological_features_df = pd.DataFrame({
        'subject': df['subject'],
        'clustering_coefficient': clustering_coefficients,
        'average_node_degree': average_node_degrees,
        'global_efficiency': global_efficiencies,
        'characteristic_path_length': characteristic_path_lengths,
        'assortativity': assortativity,
        'autism': df['autism'],
        'adhd': df['adhd'],
        'healthy': df['healthy']
    })

    # Print the DataFrame with calculated features
    print(topological_features_df)
    topological_features_df.fillna(0, inplace=True)

    return topological_features_df

In [17]:
def make_dataset(choice):
    # Load dataframe from the pickle file
    data = pd.read_pickle('/content/drive/MyDrive/Colab Notebooks/ROIxTimeseries/psi_data.pkl')

    if choice == 'A':
        # Filter rows where 'adhd' or 'autism' is 1 (keep only ADHD or autism subjects)
        data = data[(data['adhd'] == 1) | (data['autism'] == 1)]
        y_cols = ['adhd', 'autism']  # Specify the columns for y
    elif choice == 'B':
        # Filter rows where 'autism' or 'healthy' is 1 (keep only autism or healthy subjects)
        data = data[(data['autism'] == 1) | (data['healthy'] == 1)]
        y_cols = ['autism', 'healthy']  # Specify the columns for y
    elif choice == 'C':
        # Filter rows where 'adhd' or 'healthy' is 1 (keep only ADHD or healthy subjects)
        data = data[(data['adhd'] == 1) | (data['healthy'] == 1)]
        y_cols = ['adhd', 'healthy']  # Specify the columns for y
    elif choice == 'D':
        # Keep all rows
        y_cols = ['adhd', 'autism', 'healthy']  # Specify the columns for y
    else:
        print("Invalid choice. Please enter 'A', 'B', 'C', or 'D'.")
        return pd.DataFrame(), []

    print(data)

    # df1 = nan_to_0(data)

    print(y_cols)
    return data, y_cols

In [18]:
def driver(choice):

    # choice = input("Enter your choice (A, B, C, or D): ").upper()

    choice = choice.upper()

    df, y_cols = make_dataset(choice)
    #print(d.head)

    # avg_thresh = set_threshold(df)
    # print("Mean Threshold", avg_thresh)
    # thresh = 0.3

    df = binarize_matrices(df)
    df_loop = remove_loop(df)
    f_df = extract_features(df_loop)

    X = f_df.drop(columns=['subject','autism','adhd','healthy'])
    print(X.isna().sum())
    # print(X.shape)
    # print(X)
    print("type of matrices", type(X))
    y = f_df[y_cols].values
    #y = to_categorical(y, num_classes=3)
    # print(y.shape)
    # print(y)
    print("type of label columns", type(y))

    y_argmax = y_convert(y)

    # print(y_argmax.shape)
    # print(y_argmax)
    print("type of label columns y_argmax", type(y_argmax))

    # Get the number of classes
    num_classes = y.shape[1]
    print("No. of classes", num_classes)

    # input_shape = X[0].shape
    # print("Input_shape:", input_shape)

    if (num_classes == 2) :
      result_df = pd.DataFrame(columns = ['seed','fold','acc','conf_mat', 'sens (recall)','f1','prec', 'tn', 'tp', 'fn', 'fp'])
    elif (num_classes == 3) :
      result_df = pd.DataFrame(columns = ['seed','fold','acc','conf_mat', 'sens (recall)','f1','prec'])
    else :
      result_df = {}

    # Manually set random seeds
    random_seeds = np.array([93, 98, 40, 19, 52, 74, 31, 66, 56, 22])
    print("Random seeds for outer loops:", random_seeds)

    all_results = []

    for outer_loop, random_seed in enumerate(random_seeds):
        print(f"Outer loop iteration: {outer_loop + 1}, Random seed: {random_seed}")

        kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_seed)

        for i, (train_index, val_index) in enumerate(kf.split(X, y_argmax)):

            print("FOLD : ", i+1)

            X_train, X_val = X.iloc[train_index], X.iloc[val_index]
            y_train, y_val = y_argmax[train_index], y_argmax[val_index]

            mmx = MinMaxScaler()
            X_train = mmx.fit_transform(X_train)
            X_val = mmx.transform(X_val)

            # compiled_m = get_model(num_classes)
            compiled_m = get_model(num_classes, choice)

            trained_m = compile_fit(compiled_m, X_train, np.array(y_train))
            # plot_history(history, i+1)

            scores = eval_model(num_classes, trained_m, X_val, y_val, y_cols)
            scores['seed']=random_seed
            scores['fold']=i+1
            print("Scores", scores)
            scores = pd.DataFrame([scores])
            result_df = pd.concat([result_df,scores], ignore_index=True)

        all_results.append(result_df)
        print(f"Outer loop {outer_loop + 1} result_df:")
        print(result_df)
        print("\n")

    return all_results

In [19]:
# Define a list of choices
choices = ['A', 'B', 'C', 'D']
# choices = ['A', 'B', 'C']

# Create an empty dictionary to store the result dataframes
result_dfs = {}

# Loop through each choice
for choice in choices:
    # Call the driver() function with the current choice
    result_df = driver(choice)

    # Store the result dataframe in the dictionary with the choice as the key
    result_dfs[choice] = result_df

    # # Combine all results for this choice into a single DataFrame
    # combined_df = pd.DataFrame()
    # for i, df in enumerate(result_df):
    #     df['outer_loop'] = i + 1  # Add a column to identify the outer loop
    #     combined_df = pd.concat([combined_df, df], ignore_index=True)

    # storing just last outer loop
    # Get the last outer loop result (which is the last DataFrame in the list)
    combined_df = result_df[-1]

    # Save the combined results to a CSV file
    filename = f"choice_{choice}_results.csv"
    combined_df.to_csv(f'/content/drive/MyDrive/Colab Notebooks/PSI_20thresh_Globalfeatures/minmax/SVM/{filename}', index=False)
    print(f"Saved combined results for choice {choice} to {filename}")

        subject                                         psi_matrix  adhd  \
0      subject1  [[1.0, 0.8016814987579317, 0.839641852247262, ...     0   
1      subject2  [[1.0, 0.9101210641534417, 0.576123749895304, ...     0   
2      subject3  [[1.0, 0.7530150385517623, 0.5067414702685676,...     0   
3      subject4  [[1.0, 0.817812140965262, 0.520061068095793, 0...     0   
4      subject5  [[1.0, 0.7477157295145737, 0.8259250654415865,...     0   
..          ...                                                ...   ...   
105  subject106  [[1.0, 0.7190486619620364, 0.3595353948075287,...     1   
106  subject107  [[1.0, 0.892350228080446, 0.6115053964108019, ...     1   
107  subject108  [[1.0, 0.9383744300499544, 0.7480842785117325,...     1   
108  subject109  [[1.0, 0.9300675513188588, 0.9240553408215981,...     1   
109  subject110  [[1.0, 0.7946738858174632, 0.5573150422007396,...     1   

     autism  healthy  
0         1        0  
1         1        0  
2         1       

  result_df = pd.concat([result_df,scores], ignore_index=True)


y_pred [0 1 1 1 1 0 0 1 1 1 0 1 1 0 0 1 0 0 1 0 0 1]
y_val [1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.5909090909090909, 'conf_mat': array([[7, 6],
       [3, 6]]), 'sens (recall)': 0.6666666666666666, 'f1': 0.5714285714285715, 'prec': 0.5, 'tn': 7, 'tp': 6, 'fn': 3, 'fp': 6, 'seed': 98, 'fold': 2}
FOLD :  3
y_pred [0 0 1 1 0 1 1 1 0 1 1 1 0 1 1 1 0 1 1 0 0 1]
y_val [1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.5, 'conf_mat': array([[3, 6],
       [5, 8]]), 'sens (recall)': 0.6153846153846154, 'f1': 0.5925925925925927, 'prec': 0.5714285714285714, 'tn': 3, 'tp': 8, 'fn': 5, 'fp': 6, 'seed': 98, 'fold': 3}
FOLD :  4
y_pred [1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 1 1]
y_val [1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]
Type of y_pred using mo

  result_df = pd.concat([result_df,scores], ignore_index=True)
  _warn_prf(average, modifier, msg_start, len(result))


y_pred [1 0 0 1 0 0 0 0 0 1 1 0 0 1 1 0 1 1 1 0 1 1]
y_val [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.6818181818181818, 'conf_mat': array([[8, 4],
       [3, 7]]), 'sens (recall)': 0.7, 'f1': 0.6666666666666666, 'prec': 0.6363636363636364, 'tn': 8, 'tp': 7, 'fn': 3, 'fp': 4, 'seed': 98, 'fold': 4}
FOLD :  5
y_pred [0 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 1 1]
y_val [0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.4090909090909091, 'conf_mat': array([[4, 4],
       [9, 5]]), 'sens (recall)': 0.35714285714285715, 'f1': 0.43478260869565216, 'prec': 0.5555555555555556, 'tn': 4, 'tp': 5, 'fn': 9, 'fp': 4, 'seed': 98, 'fold': 5}
Outer loop 2 result_df:
  seed fold       acc           conf_mat  sens (recall)        f1      prec  \
0   93    1  0.590

  _warn_prf(average, modifier, msg_start, len(result))


Scores {'acc': 0.5, 'conf_mat': array([[8, 2],
       [9, 3]]), 'sens (recall)': 0.25, 'f1': 0.35294117647058826, 'prec': 0.6, 'tn': 8, 'tp': 3, 'fn': 9, 'fp': 2, 'seed': 19, 'fold': 2}
FOLD :  3
y_pred [1 0 1 1 1 1 1 0 0 1 0 0 0 1 0 0 0 1 0 1 1 1]
y_val [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.45454545454545453, 'conf_mat': array([[5, 7],
       [5, 5]]), 'sens (recall)': 0.5, 'f1': 0.45454545454545453, 'prec': 0.4166666666666667, 'tn': 5, 'tp': 5, 'fn': 5, 'fp': 7, 'seed': 19, 'fold': 3}
FOLD :  4
y_pred [1 0 1 0 1 0 0 0 0 0 1 0 1 1 1 1 1 1 0 1 1 1]
y_val [0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.7272727272727273, 'conf_mat': array([[7, 4],
       [2, 9]]), 'sens (recall)': 0.8181818181818182, 'f1': 0.7500000000000001, 'prec': 0.69

  _warn_prf(average, modifier, msg_start, len(result))


   seed fold       acc            conf_mat  sens (recall)        f1      prec  \
0    93    1  0.590909    [[9, 1], [8, 4]]       0.333333  0.470588  0.800000   
1    93    2  0.545455    [[7, 4], [6, 5]]       0.454545  0.500000  0.555556   
2    93    3  0.363636   [[8, 0], [14, 0]]       0.000000  0.000000  0.000000   
3    93    4  0.272727   [[5, 10], [6, 1]]       0.142857  0.111111  0.090909   
4    93    5  0.500000    [[6, 5], [6, 5]]       0.454545  0.476190  0.500000   
5    98    1  0.409091   [[0, 13], [0, 9]]       1.000000  0.580645  0.409091   
6    98    2  0.454545   [[9, 0], [12, 1]]       0.076923  0.142857  1.000000   
7    98    3  0.409091    [[4, 9], [4, 5]]       0.555556  0.434783  0.357143   
8    98    4  0.681818    [[8, 4], [3, 7]]       0.700000  0.666667  0.636364   
9    98    5  0.409091    [[4, 4], [9, 5]]       0.357143  0.434783  0.555556   
10   40    1  0.363636    [[4, 8], [6, 4]]       0.400000  0.363636  0.333333   
11   40    2  0.500000    [[

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Scores {'acc': 0.45454545454545453, 'conf_mat': array([[6, 4],
       [8, 4]]), 'sens (recall)': 0.3333333333333333, 'f1': 0.4, 'prec': 0.5, 'tn': 6, 'tp': 4, 'fn': 8, 'fp': 4, 'seed': 66, 'fold': 5}
Outer loop 8 result_df:
   seed fold       acc            conf_mat  sens (recall)        f1      prec  \
0    93    1  0.590909    [[9, 1], [8, 4]]       0.333333  0.470588  0.800000   
1    93    2  0.545455    [[7, 4], [6, 5]]       0.454545  0.500000  0.555556   
2    93    3  0.363636   [[8, 0], [14, 0]]       0.000000  0.000000  0.000000   
3    93    4  0.272727   [[5, 10], [6, 1]]       0.142857  0.111111  0.090909   
4    93    5  0.500000    [[6, 5], [6, 5]]       0.454545  0.476190  0.500000   
5    98    1  0.409091   [[0, 13], [0, 9]]       1.000000  0.580645  0.409091   
6    98    2  0.454545   [[9, 0], [12, 1]]       0.076923  0.142857  1.000000   
7    98    3  0.409091    [[4, 9], [4, 5]]       0.555556  0.434783  0.357143   
8    98    4  0.681818    [[8, 4], [3, 7]]     

  result_df = pd.concat([result_df,scores], ignore_index=True)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Scores {'acc': 0.45454545454545453, 'conf_mat': array([[4, 9],
       [3, 6]]), 'sens (recall)': 0.6666666666666666, 'f1': 0.5, 'prec': 0.4, 'tn': 4, 'tp': 6, 'fn': 3, 'fp': 9, 'seed': 98, 'fold': 3}
FOLD :  4
y_pred [1 0 1 1 0 0 0 0 1 0 0 1 0 1 1 0 1 0 0 0 1 1]
y_val [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.5454545454545454, 'conf_mat': array([[7, 5],
       [5, 5]]), 'sens (recall)': 0.5, 'f1': 0.5, 'prec': 0.5, 'tn': 7, 'tp': 5, 'fn': 5, 'fp': 5, 'seed': 98, 'fold': 4}
FOLD :  5
y_pred [0 1 0 1 0 0 1 1 0 0 1 1 0 0 0 1 1 0 0 0 1 1]
y_val [0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Type of y_pred using model.predict: <class 'numpy.ndarray'>
shape of the y_pred using model.predict: (22,)
Scores {'acc': 0.45454545454545453, 'conf_mat': array([[4, 4],
       [8, 6]]), 'sens (recall)': 0.42857142857142855, 'f1': 0.5, 'prec': 0.6, 'tn': 4, 'tp': 6, 'fn': 8, 'fp

  result_df = pd.concat([result_df,scores], ignore_index=True)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.444444  0.444444  0.444444   9.000000
autism         0.625000  0.384615  0.476190  13.000000
healthy        0.312500  0.454545  0.370370  11.000000
accuracy       0.424242  0.424242  0.424242   0.424242
macro avg      0.460648  0.427868  0.430335  33.000000
weighted avg   0.471591  0.424242  0.432259  33.000000
Average Precision (first 3 classes): 0.461
Average Recall (first 3 classes): 0.428
Average F1-Score (first 3 classes): 0.430
Scores {'acc': 0.42424242424242425, 'conf_mat': array([[4, 1, 4],
       [1, 5, 7],
       [4, 2, 5]]), 'sens (recall)': 0.4278684278684279, 'f1': 0.43033509700176364, 'prec': 0.46064814814814814, 'seed': 93, 'fold': 5}
Outer loop 1 result_df:
  seed fold       acc                           conf_mat  sens (recall)  \
0   93    1  0.454545  [[4, 4, 2], [3, 2, 6], [1, 2, 9]]       0.443939   
1   93    2  0.212121  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classification report               precision    recall  f1-score   support

        adhd      0.111     0.100     0.105        10
      autism      0.417     0.417     0.417        12
     healthy      0.250     0.273     0.261        11

    accuracy                          0.273        33
   macro avg      0.259     0.263     0.261        33
weighted avg      0.269     0.273     0.270        33

classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.111111  0.100000  0.105263  10.000000
autism         0.416667  0.416667  0.416667  12.000000
healthy        0.250000  0.272727  0.260870  11.000000
accuracy       0.272727  0.272727  0.272727   0.272727
macro avg      0.259259  0.263131  0.260933  33.000000
weighted avg   0.268519  0.272727  0.270370  33.000000
Average Precision (first 3 classes): 0.259
Average Recall (first 3 classes): 0.263
Average F1-Score (first 3 classes): 0.261
Scores {'acc':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.000000  0.000000  0.000000  15.000000
autism         0.161290  0.833333  0.270270   6.000000
healthy        0.500000  0.083333  0.142857  12.000000
accuracy       0.181818  0.181818  0.181818   0.181818
macro avg      0.220430  0.305556  0.137709  33.000000
weighted avg   0.211144  0.181818  0.101088  33.000000
Average Precision (first 3 classes): 0.220
Average Recall (first 3 classes): 0.306
Average F1-Score (first 3 classes): 0.138
Scores {'acc': 0.18181818181818182, 'conf_mat': array([[ 0, 15,  0],
       [ 0,  5,  1],
       [ 0, 11,  1]]), 'sens (recall)': 0.3055555555555556, 'f1': 0.1377091377091377, 'prec': 0.22043010752688172, 'seed': 40, 'fold': 4}
FOLD :  5
y_pred [2 2 0 2 2 0 0 2 0 0 0 2 2 2 2 2 2 2 0 0 2 0 2 2 0 2 2 2 2 2 2 0 2]
y_val [1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2]
Type of y_pred using model.pre

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classification report               precision    recall  f1-score   support

        adhd      0.269     0.875     0.412         8
      autism      0.571     0.364     0.444        11
     healthy      0.000     0.000     0.000        14

    accuracy                          0.333        33
   macro avg      0.280     0.413     0.285        33
weighted avg      0.256     0.333     0.248        33

classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.269231  0.875000  0.411765   8.000000
autism         0.571429  0.363636  0.444444  11.000000
healthy        0.000000  0.000000  0.000000  14.000000
accuracy       0.333333  0.333333  0.333333   0.333333
macro avg      0.280220  0.412879  0.285403  33.000000
weighted avg   0.255744  0.333333  0.247970  33.000000
Average Precision (first 3 classes): 0.280
Average Recall (first 3 classes): 0.413
Average F1-Score (first 3 classes): 0.285
Scores {'acc':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classification report               precision    recall  f1-score   support

        adhd      0.625     0.500     0.556        10
      autism      0.400     0.143     0.211        14
     healthy      0.300     0.667     0.414         9

    accuracy                          0.394        33
   macro avg      0.442     0.437     0.393        33
weighted avg      0.441     0.394     0.371        33

classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.625000  0.500000  0.555556  10.000000
autism         0.400000  0.142857  0.210526  14.000000
healthy        0.300000  0.666667  0.413793   9.000000
accuracy       0.393939  0.393939  0.393939   0.393939
macro avg      0.441667  0.436508  0.393292  33.000000
weighted avg   0.440909  0.393939  0.370517  33.000000
Average Precision (first 3 classes): 0.442
Average Recall (first 3 classes): 0.437
Average F1-Score (first 3 classes): 0.393
Scores {'acc':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


   seed fold       acc                             conf_mat  sens (recall)  \
0    93    1  0.454545    [[4, 4, 2], [3, 2, 6], [1, 2, 9]]       0.443939   
1    93    2  0.212121    [[2, 9, 4], [2, 2, 5], [2, 4, 3]]       0.229630   
2    93    3  0.484848    [[3, 1, 6], [1, 7, 2], [3, 4, 6]]       0.487179   
3    93    4  0.393939    [[6, 1, 4], [8, 2, 2], [4, 1, 5]]       0.404040   
4    93    5  0.424242    [[4, 1, 4], [1, 5, 7], [4, 2, 5]]       0.427868   
5    98    1  0.242424   [[0, 12, 5], [0, 4, 5], [0, 3, 4]]       0.338624   
6    98    2  0.303030    [[3, 1, 4], [5, 4, 3], [5, 5, 3]]       0.313034   
7    98    3  0.363636    [[4, 1, 4], [6, 0, 8], [1, 1, 8]]       0.414815   
8    98    4  0.303030   [[2, 5, 4], [0, 6, 2], [2, 10, 2]]       0.358225   
9    98    5  0.272727    [[1, 5, 4], [2, 5, 5], [6, 2, 3]]       0.263131   
10   40    1  0.393939    [[4, 0, 3], [5, 3, 4], [6, 2, 6]]       0.416667   
11   40    2  0.272727   [[3, 2, 6], [0, 0, 13], [2, 1, 6]]     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


classification report in dataframe - match accuracy with model.evaluate 
              precision    recall  f1-score    support
adhd           0.285714  0.571429  0.380952   7.000000
autism         0.210526  0.363636  0.266667  11.000000
healthy        0.000000  0.000000  0.000000  15.000000
accuracy       0.242424  0.242424  0.242424   0.242424
macro avg      0.165414  0.311688  0.215873  33.000000
weighted avg   0.130781  0.242424  0.169697  33.000000
Average Precision (first 3 classes): 0.165
Average Recall (first 3 classes): 0.312
Average F1-Score (first 3 classes): 0.216
Scores {'acc': 0.24242424242424243, 'conf_mat': array([[ 4,  3,  0],
       [ 7,  4,  0],
       [ 3, 12,  0]]), 'sens (recall)': 0.3116883116883117, 'f1': 0.21587301587301586, 'prec': 0.16541353383458646, 'seed': 56, 'fold': 4}
FOLD :  5
y_pred [2 2 2 0 2 0 1 2 0 2 2 2 2 2 0 2 0 0 1 0 0 2 2 1 2 2 2 0 2 2 2 1 2]
y_val [1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2]
Type of y_pred using model.pr

In [None]:
# # Define a list of choices
# # choices = ['A', 'B', 'C', 'D']
# choices = ['A']

# # Create an empty dictionary to store the result dataframes
# result_dfs = {}

# # Loop through each choice
# for choice in choices:
#     # Call the driver() function with the current choice
#     result_df = driver(choice)

#     # Store the result dataframe in the dictionary with the choice as the key
#     result_dfs[choice] = result_df

        subject                                         psi_matrix  adhd  \
0      subject1  [[1.0, 0.8016814987579317, 0.839641852247262, ...     0   
1      subject2  [[1.0, 0.9101210641534417, 0.576123749895304, ...     0   
2      subject3  [[1.0, 0.7530150385517623, 0.5067414702685676,...     0   
3      subject4  [[1.0, 0.817812140965262, 0.520061068095793, 0...     0   
4      subject5  [[1.0, 0.7477157295145737, 0.8259250654415865,...     0   
..          ...                                                ...   ...   
105  subject106  [[1.0, 0.7190486619620364, 0.3595353948075287,...     1   
106  subject107  [[1.0, 0.892350228080446, 0.6115053964108019, ...     1   
107  subject108  [[1.0, 0.9383744300499544, 0.7480842785117325,...     1   
108  subject109  [[1.0, 0.9300675513188588, 0.9240553408215981,...     1   
109  subject110  [[1.0, 0.7946738858174632, 0.5573150422007396,...     1   

     autism  healthy  
0         1        0  
1         1        0  
2         1       

In [None]:
# print(result_dfs['A'])
# result_dfs['A'].to_csv('results/PSI(20 thresh)_SVM-globalfeatures/adhd-autism.csv', mode = 'w', index=False)

  fold       acc           conf_mat  sens (recall)        f1      prec tn  tp  \
0    1  0.454545   [[5, 7], [5, 5]]       0.500000  0.454545  0.416667  5   5   
1    2  0.545455  [[2, 5], [5, 10]]       0.666667  0.666667  0.666667  2  10   
2    3  0.636364   [[7, 2], [6, 7]]       0.538462  0.636364  0.777778  7   7   
3    4  0.545455   [[6, 9], [1, 6]]       0.857143  0.545455  0.400000  6   6   
4    5  0.454545   [[4, 8], [4, 6]]       0.600000  0.500000  0.428571  4   6   

  fn fp  
0  5  7  
1  5  5  
2  6  2  
3  1  9  
4  4  8  


OSError: Cannot save file into a non-existent directory: 'results/PSI(20 thresh)_SVM-globalfeatures'

In [None]:
# print(result_dfs['B'])
# result_dfs['B'].to_csv('results/PSI(20 thresh)_SVM-globalfeatures/autism-healthy.csv', mode = 'w', index=False)

In [None]:
# print(result_dfs['C'])
# result_dfs['C'].to_csv('results/PSI(20 thresh)_SVM-globalfeatures/adhd-healthy.csv', mode = 'w', index=False)

In [None]:
# print(result_dfs['D'])
# result_dfs['D'].to_csv('results/PSI(20 thresh)_SVM-globalfeatures/adhd-autism-healthy.csv', mode = 'w', index=False)