In [33]:

import numpy as np
import pandas as pd
import json
import sklearn
import matplotlib.pyplot as plt
import tensorflow as tf

json file names and their threshold list:

results 1:[25, 50, 75, 90, 95, 99]

results 2: [10, 25, 40, 50, 60, 75, 90, 95]

results 3: [5, 15, 25, 35, 50, 70, 80, 90, 95]

cos = cosine simmilarity distance

l2 = L2/Euclidean distance

In [34]:
SAVE = False
OUTPUT_FILE = "/home/users/orrbavly/GNN_project/percentiles/outputs/cos_3_all_results.csv"

In [35]:
# Load data
INPUT_JSON = "/home/dsi/orrbavly/GNN_project/embeddings/colon_percentiles/percentiles_results_l2_1_all.json"
data_type = 'colon'

In [36]:
def load_results(file_path):
    with open(file_path, 'r') as f:
        all_results = json.load(f)
    return all_results

In [37]:
all_results = load_results(INPUT_JSON)

# Prepare Data

In [38]:
from sklearn.model_selection import train_test_split
import numpy as np
import sklearn.preprocessing

def prepare_data(percentiles_data, labels_dict, vector_indices=None, average_vectors=False):
    data = []
    labels = []

    if average_vectors:
        for sample_name, percentiles_dict in percentiles_data.items():
            vectors = np.array(list(percentiles_dict.values()))
            avg_vector = np.mean(vectors, axis=0)
            data.append(avg_vector)
            labels.append(labels_dict[sample_name])
    else:
        max_length = max(len(np.concatenate(list(percentiles_dict.values()))) for percentiles_dict in percentiles_data.values())
        for sample_name, percentiles_dict in percentiles_data.items():
            vectors = list(percentiles_dict.values())
            if vector_indices is not None:
                selected_vectors = [vectors[i] for i in vector_indices if i < len(vectors)]
                flattened_percentiles = np.concatenate(selected_vectors)
            else:
                flattened_percentiles = np.concatenate(vectors)
            padded_percentiles = np.pad(flattened_percentiles, (0, max_length - len(flattened_percentiles)), 'constant')
            data.append(padded_percentiles)
            labels.append(labels_dict[sample_name])

    data = np.array(data)
    labels = np.array(labels)

    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)

    scaler = sklearn.preprocessing.StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test


In [39]:
def filter_samples(data, criteria):
    filtered_data = {key: value for key, value in data.items() if criteria not in key.lower()}
    return filtered_data


In [40]:
# Run this cell if you want to exclude sample groups from df.
# Case is case-sensitive
# all_results = filter_samples(all_results, criteria="nh")


In [41]:
labels_dict = {}
for sample_name, percentile_dict in all_results.items():
    if data_type == 'ovarian':
        if sample_name.endswith("_H"):
            labels_dict[sample_name] = 0
        elif sample_name.endswith("_OC"):
            labels_dict[sample_name] = 1
        else:
            raise Exception("Error - invalid sample type")
    elif data_type == 'colon':
        if sample_name.endswith("_low"):
            labels_dict[sample_name] = 0
        elif sample_name.endswith("_high"):
            labels_dict[sample_name] = 1
        else:
            raise Exception("Error - invalid sample type")

# ML algorithms

In [42]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## KNN

In [43]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_knn(X_train, X_test, y_train, y_test, neighbors=5):
    knn = KNeighborsClassifier(n_neighbors=neighbors)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("KNN Results")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return knn, {'model': 'KNN', 'accuracy': accuracy, 'classification_report': class_report}


## Logistic Regression:

In [44]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_logistic_regression(X_train, X_test, y_train, y_test):
    logistic_regression = LogisticRegression()
    logistic_regression.fit(X_train, y_train)
    y_pred = logistic_regression.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("Logistic Regression Results")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return logistic_regression, {'model': 'Logistic Regression', 'accuracy': accuracy, 'classification_report': class_report}


## SVM

In [45]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_svm(X_train, X_test, y_train, y_test):
    svm = SVC()
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("SVM Results")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return svm, {'model': 'SVM', 'accuracy': accuracy, 'classification_report': class_report}


## Decition Tree

In [46]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_decision_tree(X_train, X_test, y_train, y_test):
    decision_tree = DecisionTreeClassifier()
    decision_tree.fit(X_train, y_train)
    y_pred = decision_tree.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("Decision Tree Results")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return decision_tree, {'model': 'Decision Tree', 'accuracy': accuracy, 'classification_report': class_report}


## Random Forest

In [47]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_random_forest(X_train, X_test, y_train, y_test):
    random_forest = RandomForestClassifier(class_weight='balanced')
    random_forest.fit(X_train, y_train)
    y_pred = random_forest.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("Random Forest Results")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return random_forest, {'model': 'Random Forest', 'accuracy': accuracy, 'classification_report': class_report}


## MLP

In [48]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_mlp(X_train, X_test, y_train, y_test):
    neural_network = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300)
    neural_network.fit(X_train, y_train)
    y_pred = neural_network.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("MLP Results")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return neural_network, {'model': 'MLP', 'accuracy': accuracy, 'classification_report': class_report}


## XGBoost

In [49]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_xgboost(X_train, X_test, y_train, y_test):
    model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("XGBoost Results:")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return model, {'model': 'XGBoost', 'accuracy': accuracy, 'classification_report': class_report}


## Neural Network (Keras)

In [50]:
from tensorflow import keras
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def train_and_evaluate_nn(X_train, X_test, y_train, y_test):
    model = keras.Sequential([
        keras.layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
        keras.layers.Dense(16, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=0)
    
    y_pred = (model.predict(X_test) > 0.5).astype("int32")

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    print("Neural Network Results:")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

    return model, {'model': 'Neural Network', 'accuracy': accuracy, 'classification_report': class_report}


# Various K values

In [51]:
results = []  # Holds all the different k values (and their models) results.
models = []


In [52]:
def run_evaluation(X_train, X_test, y_train, y_test, k_value, results, models, save_results=True):
    model_functions = [
        ('KNN', train_and_evaluate_knn),
        ('Logistic Regression', train_and_evaluate_logistic_regression),
        ('SVM', train_and_evaluate_svm),
        ('Decision Tree', train_and_evaluate_decision_tree),
        ('Random Forest', train_and_evaluate_random_forest),
        ('MLP', train_and_evaluate_mlp),
        ('XGBoost', train_and_evaluate_xgboost), 
        ('Neural Network', train_and_evaluate_nn)
    ]

    for model_name, train_func in model_functions:
        model, result = train_func(X_train, X_test, y_train, y_test)
        if save_results:
            results.append({'k_value': k_value, **result})
            models.append({'k_value': k_value, 'model_name': model_name, 'model': model})


## all k

In [53]:
X_train, X_test, y_train, y_test = prepare_data(all_results, labels_dict)

In [54]:
print(y_train, len(y_train))
print(y_test, len(y_test))

[0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0
 0 1 0 0 1 0 0 0 1 0 1 1 1 0 1 0 1 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1
 0 0 1 1 1 1 0 1 1 1 0 0 1 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0] 111
[1 1 1 1 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 1 1 0 1 0 1 0 0] 28


In [55]:
from collections import Counter

# Count occurrences of 0 and 1
value_counts = Counter(labels_dict.values())

# Print the number of 0s and 1s
print(f"Number of 0s: {value_counts[0]}")
print(f"Number of 1s: {value_counts[1]}")

Number of 0s: 90
Number of 1s: 49


In [56]:
run_evaluation(X_train, X_test, y_train, y_test, k_value="all_k", results=results, models=models)


KNN Results
Accuracy: 0.5
Confusion Matrix:
[[12  2]
 [12  2]]
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.86      0.63        14
           1       0.50      0.14      0.22        14

    accuracy                           0.50        28
   macro avg       0.50      0.50      0.43        28
weighted avg       0.50      0.50      0.43        28

Logistic Regression Results
Accuracy: 0.4642857142857143
Confusion Matrix:
[[13  1]
 [14  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.48      0.93      0.63        14
           1       0.00      0.00      0.00        14

    accuracy                           0.46        28
   macro avg       0.24      0.46      0.32        28
weighted avg       0.24      0.46      0.32        28

SVM Results
Accuracy: 0.5
Confusion Matrix:
[[14  0]
 [14  0]]
Classification Report:
              precision    recall  f1-score   support

  

Random Forest Results
Accuracy: 0.5
Confusion Matrix:
[[12  2]
 [12  2]]
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.86      0.63        14
           1       0.50      0.14      0.22        14

    accuracy                           0.50        28
   macro avg       0.50      0.50      0.43        28
weighted avg       0.50      0.50      0.43        28

MLP Results
Accuracy: 0.5714285714285714
Confusion Matrix:
[[12  2]
 [10  4]]
Classification Report:
              precision    recall  f1-score   support

           0       0.55      0.86      0.67        14
           1       0.67      0.29      0.40        14

    accuracy                           0.57        28
   macro avg       0.61      0.57      0.53        28
weighted avg       0.61      0.57      0.53        28

XGBoost Results:
Accuracy: 0.5
Confusion Matrix:
[[12  2]
 [12  2]]
Classification Report:
              precision    recall  f1-score   support

   

In [57]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix

# Create a pipeline with scaling and SVM
pipe = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling
    ('svm', SVC())  # SVM classifier
])

# Define the parameter grid
param_grid = {
    'svm__C': [0.1, 1, 10, 100],
    'svm__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svm__gamma': [0.001, 0.01, 0.1, 1]
}

# Initialize GridSearchCV with the pipeline and parameter grid
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy')

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Predict using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Best Parameters: {'svm__C': 0.1, 'svm__gamma': 1, 'svm__kernel': 'sigmoid'}
Best Score: 0.6936758893280632
Test Accuracy: 0.5
[[14  0]
 [14  0]]


In [58]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix

# Create a pipeline with scaling and Random Forest
pipe = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling
    ('rf', RandomForestClassifier())  # Random Forest classifier
])

# Define the parameter grid
param_grid = {
    'rf__n_estimators': [50, 100, 200],
    'rf__max_depth': [None, 10, 20, 30],
    'rf__min_samples_split': [2, 5, 10],
    'rf__min_samples_leaf': [1, 2, 4],
    'rf__max_features': ['sqrt', 'log2'],
    'rf__bootstrap': [True, False]
}

# Initialize GridSearchCV with the pipeline and parameter grid
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Predict using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Best Parameters: {'rf__bootstrap': True, 'rf__max_depth': None, 'rf__max_features': 'sqrt', 'rf__min_samples_leaf': 2, 'rf__min_samples_split': 5, 'rf__n_estimators': 50}
Best Score: 0.6486166007905138
Test Accuracy: 0.4642857142857143
[[11  3]
 [12  2]]


In [59]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Create a pipeline with scaling and MLP
pipe = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling
    ('mlp', MLPClassifier(max_iter=1000))  # MLP classifier
])

# Define the parameter grid
param_grid = {
    'mlp__hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'mlp__activation': ['tanh', 'relu'],
    'mlp__solver': ['adam', 'sgd'],
    'mlp__alpha': [0.0001, 0.001, 0.01],
    'mlp__learning_rate': ['constant', 'adaptive']
}

# Initialize GridSearchCV with the pipeline and parameter grid
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Predict using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))




Best Parameters: {'mlp__activation': 'relu', 'mlp__alpha': 0.01, 'mlp__hidden_layer_sizes': (50,), 'mlp__learning_rate': 'constant', 'mlp__solver': 'sgd'}
Best Score: 0.7027667984189723
Test Accuracy: 0.4642857142857143
[[13  1]
 [14  0]]


## Average K

In [60]:
X_train_avg, X_test_avg, y_train_avg, y_test_avg = prepare_data(all_results, labels_dict, average_vectors=True)

In [61]:
run_evaluation(X_train_avg, X_test_avg, y_train_avg, y_test_avg, k_value="avg_k", results=results, models=models)


KNN Results
Accuracy: 0.5714285714285714
Confusion Matrix:
[[15  2]
 [10  1]]
Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.88      0.71        17
           1       0.33      0.09      0.14        11

    accuracy                           0.57        28
   macro avg       0.47      0.49      0.43        28
weighted avg       0.50      0.57      0.49        28

Logistic Regression Results
Accuracy: 0.6071428571428571
Confusion Matrix:
[[17  0]
 [11  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.61      1.00      0.76        17
           1       0.00      0.00      0.00        11

    accuracy                           0.61        28
   macro avg       0.30      0.50      0.38        28
weighted avg       0.37      0.61      0.46        28

SVM Results
Accuracy: 0.6071428571428571
Confusion Matrix:
[[17  0]
 [11  0]]
Classification Report:
              precision    

# Centrality 

## pagerank

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [19]:
feature_df = pd.read_csv("/home/dsi/orrbavly/GNN_project/outputs/centrality/pagerank_top10.csv")

In [20]:
def run_feature_selection(feature_df):
    # Step 1: Create Labels (1 for 'H', 0 for 'OC')
    feature_df['label'] = feature_df['graph_name'].apply(lambda x: 1 if 'OC' in x else 0)

    # Step 2: Split data into features (X) and labels (y)
    X = feature_df.drop(columns=['graph_name', 'label'])  # Features are all columns except 'graph_name' and 'label'
    y = feature_df['label']  # Labels are 1 (H) or 0 (OC)

    # Step 3: Split the dataset into training and testing sets (e.g., 80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    run_evaluation(X_train, X_test, y_train, y_test, k_value="page_rank", results=[], models=[], save_results=False)


In [21]:
run_feature_selection(feature_df)

KNN Results
Accuracy: 0.6071428571428571
Confusion Matrix:
[[14  3]
 [ 8  3]]
Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.82      0.72        17
           1       0.50      0.27      0.35        11

    accuracy                           0.61        28
   macro avg       0.57      0.55      0.54        28
weighted avg       0.58      0.61      0.57        28

Logistic Regression Results
Accuracy: 0.6785714285714286
Confusion Matrix:
[[15  2]
 [ 7  4]]
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.88      0.77        17
           1       0.67      0.36      0.47        11

    accuracy                           0.68        28
   macro avg       0.67      0.62      0.62        28
weighted avg       0.68      0.68      0.65        28

SVM Results
Accuracy: 0.6071428571428571
Confusion Matrix:
[[17  0]
 [11  0]]
Classification Report:
              precision    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


MLP Results
Accuracy: 0.75
Confusion Matrix:
[[17  0]
 [ 7  4]]
Classification Report:
              precision    recall  f1-score   support

           0       0.71      1.00      0.83        17
           1       1.00      0.36      0.53        11

    accuracy                           0.75        28
   macro avg       0.85      0.68      0.68        28
weighted avg       0.82      0.75      0.71        28



2024-09-29 14:37:44.924467: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 16.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-09-29 14:37:44.938951: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at xla_ops.cc:580 : RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16779264 bytes.
2024-09-29 14:37:44.939017: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16779264 bytes.
	 [[{{node StatefulPartitionedCall}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.



ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/runpy.py", line 197, in _run_module_as_main

  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/runpy.py", line 87, in _run_code

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/asyncio/base_events.py", line 601, in run_forever

  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once

  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/asyncio/events.py", line 80, in _run

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes

  File "/home/dsi/orrbavly/.local/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code

  File "/tmp/ipykernel_3640228/2691040062.py", line 1, in <module>

  File "/tmp/ipykernel_3640228/960368646.py", line 12, in run_feature_selection

  File "/tmp/ipykernel_3640228/2767086632.py", line 14, in run_evaluation

  File "/tmp/ipykernel_3640228/2783800479.py", line 13, in train_and_evaluate_nn

  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/site-packages/keras/src/backend/tensorflow/trainer.py", line 320, in fit

  File "/home/dsi/orrbavly/miniconda3/envs/dl/lib/python3.9/site-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

Out of memory while trying to allocate 16779264 bytes.
	 [[{{node StatefulPartitionedCall}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_one_step_on_iterator_5572]

# General EDA