# Gaussian Kernel Graph

<p align = "justify">In this appraoch, each spectra is a node and every node is connected to every other node in the graph making it fully connected. To calculate the relationahip weight between each node, we first calculate the euchildean distance between each spectra, using each spectras absorbance values as a vector. We can do this as the intervals between intensity data points remains unchanged across every spectra. We then pass the euchildean distance through the Gaussian kernel function to get a similarity measure between each spectra. This is then used as a relationship weight.

## Importing and Preprocessing Data for Neo4j

Import relevant libraries and import raw data

In [59]:
import sys
sys.path.append('..')  # Adds the parent directory to the path so Python can find the `Cleaning_and_Evaluation` package
from Cleaning_and_Evaluation import *
import seaborn as sns
import pandas as pd
from scipy.signal import find_peaks
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
import numpy as np
import os

Here we import the neo4j directory and password variables that have been set in our environment

In [60]:
os_name = os.name

if os_name == 'nt':
    password = os.getenv('NEO4J_Password')
    neo4j_directory = os.getenv('NEO4J_Directory')
else:
    password = os.environ['NEO4J_Password']
    neo4j_directory = os.environ['NEO4J_Directory']

Alternatively you can manually input the neo4j directory and password here

In [61]:
# password = "your_neo4j_password"
# neo4j_directory = "your_neo4j_dbms_directory"

In [62]:
df = pd.read_csv("../data/exosomes.raw_spectrum_400-1800.csv")

In [63]:
cleaning_params = {
    'despike': False,
    'baseline_correct': True,
    'smoothing': True,
    'scaling': False,
    'despike_ma': 10,
    'despike_threshold': 7,
    'lam': 10**9,
    'p': 0.05,
    'window_size': 35,
    'poly_order': 3
}
spectra_cleaning(df, **cleaning_params)

df

Unnamed: 0,SpecID,Seq,WaveNumber,Absorbance,SurID,Status
0,201210-1-00,293,400.22778,31.645788,201210-1,Normal
1,201210-1-00,294,400.91116,31.890799,201210-1,Normal
2,201210-1-00,295,401.59454,32.060592,201210-1,Normal
3,201210-1-00,296,402.27789,32.161890,201210-1,Normal
4,201210-1-00,297,402.96127,32.201418,201210-1,Normal
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,-17.773341,210526-3,Hyperglycemia
6239201,210526-3-09,2338,1797.72200,-16.954783,210526-3,Hyperglycemia
6239202,210526-3-09,2339,1798.40550,-16.058237,210526-3,Hyperglycemia
6239203,210526-3-09,2340,1799.08890,-15.079158,210526-3,Hyperglycemia


In [64]:
pivot_df = prepare_wavelength_df(df, 'Absorbance')

Calculate euchlidean distance between each spectra

In [65]:
from scipy.spatial.distance import pdist, squareform

# Drop the 'Status' column as it is not numeric
X = pivot_df.drop(columns=['Status', 'SurID'])

# Calculate pairwise Euclidean distances
distances = pdist(X.values, metric='euclidean')

# Convert the condensed distances to a square matrix form
distance_matrix = squareform(distances)

Pass this through the Gaussian Kernel

In [66]:
def gaussian_kernel(distances, sigma):
    return np.exp(-distances**2 / (2 * sigma**2))

# Sigma is the bandwidth parameter
sigma = 4000 # Adjust sigma as needed
kernel_matrix = gaussian_kernel(distance_matrix, sigma)

# Convert the kernel matrix to a DataFrame
kernel_df = pd.DataFrame(kernel_matrix, index=pivot_df.index, columns=pivot_df.index)

In [67]:
kernel_df.head()

SpecID,201210-1-00,201210-1-01,201210-1-02,201210-1-03,201210-1-04,201210-1-05,201210-1-06,201210-1-07,201210-1-09,201210-1-10,...,210526-3-40,210526-3-41,210526-3-42,210526-3-43,210526-3-44,210526-3-45,210526-3-46,210526-3-47,210526-3-48,210526-3-49
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201210-1-00,1.0,0.958482,0.181013,0.017597,0.768449,0.635039,0.608312,0.752215,0.778809,0.890076,...,0.931217,0.927949,0.920557,0.933716,0.936992,0.935256,0.936214,0.934178,0.936999,0.935435
201210-1-01,0.958482,1.0,0.221635,0.022608,0.827849,0.703053,0.663901,0.78129,0.798276,0.880948,...,0.887116,0.885214,0.875394,0.880437,0.880964,0.877912,0.878703,0.872434,0.875986,0.874292
201210-1-02,0.181013,0.221635,1.0,0.085583,0.170337,0.155416,0.138406,0.158948,0.166674,0.163002,...,0.136024,0.135639,0.134443,0.133412,0.133163,0.131216,0.133461,0.137171,0.132234,0.13151
201210-1-03,0.017597,0.022608,0.085583,1.0,0.022224,0.02146,0.022717,0.022639,0.023122,0.018627,...,0.014369,0.014554,0.013833,0.013232,0.013098,0.012683,0.012765,0.013201,0.012693,0.012534
201210-1-04,0.768449,0.827849,0.170337,0.022224,1.0,0.929182,0.800917,0.827279,0.81636,0.835327,...,0.755688,0.75642,0.743123,0.734776,0.72864,0.723284,0.724097,0.708914,0.711472,0.717978


In [68]:
# Renaming and preprocessing for Neo4j
nodes_df = pivot_df['Status'].to_frame()
nodes_df = nodes_df.rename_axis("SpecID:ID")
nodes_df = nodes_df.rename(columns={"Status": ":LABEL"})

In [69]:
# Saving into Neo4j import folder
nodes_df.to_csv(f"{neo4j_directory}/import/nodes.csv")

In [70]:
# Renaming and preprocessing for Neo4j
relationship_df = kernel_df.reset_index()
relationship_df = relationship_df.melt(id_vars='SpecID', var_name=':END_ID', value_name='Weight:float')
relationship_df = relationship_df.rename(columns={'SpecID': ':START_ID'})
relationship_df[':TYPE'] = 'LINK'
relationship_df = relationship_df[relationship_df[":START_ID"] != relationship_df[":END_ID"]]
relationship_df = relationship_df[relationship_df[":START_ID"] < relationship_df[":END_ID"]]

In [71]:
# Saving into Neo4j import folder
relationship_df.to_csv(f"{neo4j_directory}/import/relationships.csv")

## Building Graph and Running Graph Algorithms

In [72]:
from neo4j import GraphDatabase
import shutil
import subprocess

Then run the admin import command.

In [73]:
os_name = os.name

if os_name == 'nt':
    working_dir = f'{neo4j_directory}/bin'
    command = 'neo4j-admin database import full --nodes=import/nodes.csv --relationships=import/relationships.csv --overwrite-destination neo4j'
    result = subprocess.run(command, shell=True, cwd=working_dir, capture_output=True, text=True)
else:
    working_dir = f'{neo4j_directory}'
    command = [
    './bin/neo4j-admin', 'database', 'import', 'full',
    '--nodes=import/nodes.csv',
    '--relationships=import/relationships.csv',
    '--overwrite-destination',  'neo4j'
    ]
    result = subprocess.run(command, cwd=working_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)


# Check if the command was successful
if result.returncode == 0:
    print("Import successful")
    print(result.stdout)
else:
    print("Error during import:")
    print(result.stderr)

Import successful
Neo4j version: 5.18.0
Importing the contents of these files into C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\data\databases\neo4j:
Nodes:
  C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\import\nodes.csv

Relationships:
  C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\import\relationships.csv


Available resources:
  Total machine memory: 31.86GiB
  Free machine memory: 15.68GiB
  Max heap memory : 910.5MiB
  Max worker threads: 8
  Configured max memory: 13.38GiB
  High parallel IO: true

Cypher type normalization is enabled (disable with --normalize-types=false):
  Property type of 'Weight' normalized from 'float' --> 'double' in C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\import\relationships.csv

Import starting 2024-04-21 13:01:31.174+0100
  Estimated number of nodes: 3.05 k
  Estimated numbe

In [134]:
if os_name != 'nt':
    command = [
        './bin/neo4j', 'restart'
    ]

    result = subprocess.run(command, cwd=neo4j_directory, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Check if the command was successful
    if result.returncode == 0:
        print("Restart successful")
        print(result.stdout)
    else:
        print("Error during restart:")
        print(result.stderr)

Restart successful


Run and connect to the Neo4j Database

In [74]:
from neo4j import GraphDatabase

uri = "neo4j://localhost:7687"
username = "neo4j"              # Neo4J username
#Password defined at teh top of the notebook

# Create a driver instance
driver = GraphDatabase.driver(uri, auth=(username, password))

# Ensure you close the driver connection when your program ends
def close_driver():
    driver.close()

**First create the gds Graph Projection**

In [75]:
def project_graph(tx):
    query = """
    CALL gds.graph.project(
      'myGraph', 
      ['Normal', 'Hyperglycemia', 'Hypoglycemia'],
      {
        LINK: {
          orientation: 'UNDIRECTED',
          properties: 'Weight'
        }
      }
    )
    """
    tx.run(query)

# Use a session to execute the graph projection
with driver.session() as session:
    session.execute_write(project_graph)

Define the graph algorithms.

In [76]:
def run_pagerank_centrality(tx):
    query = """
    CALL gds.pageRank.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [77]:
def run_degree_centrality(tx):
    query = """
    CALL gds.degree.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [78]:
def run_eigenvector_centrality(tx):
    query = """
    CALL gds.eigenvector.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [79]:
def run_articlerank_centrality(tx):
    query = """
    CALL gds.articleRank.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [80]:
def run_label_propagation_algorithm(tx):
    query = """
    CALL gds.labelPropagation.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [81]:
def run_leiden_algorithm(tx):
    query = """
    CALL gds.leiden.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [82]:
def run_louvain_algorithm(tx):
    query = """
    CALL gds.louvain.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [83]:
def run_node2vec_algorithm(tx):
    query = """
    CALL gds.node2vec.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, embedding
    RETURN gds.util.asNode(nodeId).SpecID AS name, embedding
    """
    results = tx.run(query)
    return [(record["name"], record["embedding"]) for record in results]

In [84]:
def run_fastRP_algorithm(tx):
    query = """
    CALL gds.fastRP.stream('myGraph',
        { relationshipWeightProperty: 'Weight',
         randomSeed:1234,
         embeddingDimension: 128
        }
    )
    YIELD nodeId, embedding
    RETURN gds.util.asNode(nodeId).SpecID AS name, embedding
    """
    results = tx.run(query)
    return [(record["name"], record["embedding"]) for record in results]

Execute the algorithms and store the results in a Dataframe.

In [85]:
# Use a session to execute the queries and retrieve the results
with driver.session() as session:
    pagerank_results = session.execute_read(run_pagerank_centrality)
    degree_results = session.execute_read(run_degree_centrality)
    eigenvector_results = session.execute_read(run_eigenvector_centrality)
    articlerank_results = session.execute_read(run_articlerank_centrality)
    label_propagation_results = session.execute_read(run_label_propagation_algorithm)
    leiden_results = session.execute_read(run_leiden_algorithm)
    louvain_results = session.execute_read(run_louvain_algorithm)

In [86]:
pagerank_df = pd.DataFrame(pagerank_results, columns=['name', 'PageRank'])
degree_df = pd.DataFrame(degree_results, columns=['name', 'DegreeCentrality'])
eigenvector_df = pd.DataFrame(eigenvector_results, columns=['name', 'EigenvectorCentrality'])
articlerank_df = pd.DataFrame(articlerank_results, columns=['name', 'ArticleRank'])
label_propagation_df = pd.DataFrame(label_propagation_results, columns=['name', 'LabelPropagation'])
leiden_df = pd.DataFrame(leiden_results, columns=['name', 'Leiden'])
louvain_df = pd.DataFrame(louvain_results, columns=['name', 'Louvain'])

In [87]:
merged_df = pagerank_df
for df in [degree_df, eigenvector_df, articlerank_df, leiden_df, louvain_df]:
    merged_df = pd.merge(merged_df, df, on=['name'], how='left')

In [88]:
df = merged_df.rename(columns={'name' : 'SpecID'})
df.head()

Unnamed: 0,SpecID,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,Leiden,Louvain
0,210504-1-33,7.952879,1.327693e-08,0.0,0.15,20,2527
1,210505-1-12,7.188303,1.298646e-08,0.0,0.15,20,2527
2,210510-2-43,6.588763,0.0793673,0.0,0.150003,21,2658
3,210505-1-11,6.138366,0.01445029,0.0,0.150001,23,2525
4,210505-1-10,5.408174,0.0143981,0.0,0.150001,23,2525


In [89]:
with driver.session() as session:
    fastRP_results = session.execute_read(run_fastRP_algorithm)

fastRP_df = pd.DataFrame(fastRP_results, columns=['SpecID', 'embeddings'])

# Expand the embeddings list into separate columns
embeddings_df = pd.DataFrame(fastRP_df['embeddings'].tolist(), index=fastRP_df.index)

# Optionally, rename the new columns
embeddings_df.columns = [f'embedding_{i}' for i in range(embeddings_df.shape[1])]

# Join the new embeddings columns to the original DataFrame
fastRP_df = pd.concat([fastRP_df.drop(['embeddings'], axis=1), embeddings_df], axis=1)
fastRP_df.to_csv('../../data/fastRP_embeddings.csv', index=False)
fastRP_df.head()

Unnamed: 0,SpecID,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,...,embedding_118,embedding_119,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127
0,201210-1-00,-0.126856,0.041533,0.312416,0.223328,0.12833,-0.112877,0.034097,-0.075095,0.027202,...,-0.198157,-0.007818,-0.104161,0.107659,0.304749,-0.055208,0.043022,-0.101856,-0.005557,-0.018172
1,201210-1-01,-0.125399,0.040796,0.311529,0.220783,0.128676,-0.112619,0.034209,-0.076966,0.028316,...,-0.198135,-0.008172,-0.102161,0.108725,0.305113,-0.055215,0.042228,-0.103425,-0.004603,-0.019147
2,201210-1-02,-0.123358,0.039899,0.31053,0.2175,0.129119,-0.112073,0.034152,-0.079315,0.029759,...,-0.198125,-0.008845,-0.099716,0.110159,0.305353,-0.055236,0.041107,-0.105426,-0.003343,-0.02021
3,201210-1-03,-0.110808,0.030174,0.303462,0.195779,0.132131,-0.110179,0.03303,-0.095497,0.039937,...,-0.198113,-0.009282,-0.084771,0.119222,0.307137,-0.058461,0.033479,-0.119889,0.002414,-0.026324
4,201210-1-04,-0.121557,0.038382,0.309043,0.213698,0.129492,-0.112088,0.034283,-0.082122,0.031278,...,-0.1981,-0.008898,-0.097029,0.111685,0.306225,-0.055691,0.040199,-0.107878,-0.002353,-0.021484


Now that we have all our graph metrics, we need to join back with our nodes dataframe in order to get the status column back and perform some quick cleaning

In [90]:
status_df = pd.read_csv(f"{neo4j_directory}/import/nodes.csv")

graph_df = pd.merge(df, status_df, left_on="SpecID", right_on="SpecID:ID", how="inner")
graph_df.drop(columns=["SpecID:ID"], inplace=True)
graph_df['SurID'] = graph_df['SpecID'].str[:-3]
graph_df.set_index("SpecID", inplace=True)

fast_graph_df = pd.merge(fastRP_df, status_df, left_on="SpecID", right_on="SpecID:ID", how="inner")
fast_graph_df.drop(columns=["SpecID:ID"], inplace=True)
fast_graph_df['SurID'] = fast_graph_df['SpecID'].str[:-3]
fast_graph_df.set_index("SpecID", inplace=True)

graph_df.rename(columns={":LABEL":"Status"}, inplace=True)

fast_graph_df.rename(columns={":LABEL":"Status"}, inplace=True)

## Machine Learning Using Graph Metrics KFold

## Centrality Metrics

In [91]:
graph_df.head()

Unnamed: 0_level_0,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,Leiden,Louvain,Status,SurID
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
210504-1-33,7.952879,1.327693e-08,0.0,0.15,20,2527,Hypoglycemia,210504-1
210505-1-12,7.188303,1.298646e-08,0.0,0.15,20,2527,Hypoglycemia,210505-1
210510-2-43,6.588763,0.0793673,0.0,0.150003,21,2658,Hyperglycemia,210510-2
210505-1-11,6.138366,0.01445029,0.0,0.150001,23,2525,Hypoglycemia,210505-1
210505-1-10,5.408174,0.0143981,0.0,0.150001,23,2525,Hypoglycemia,210505-1


In [92]:
from sklearn.ensemble import ExtraTreesClassifier
et = ExtraTreesClassifier(random_state=1234)
evaluate_model(graph_df, et, groupkfold=False)

ExtraTreesClassifier Cross-Validation Accuracy: 0.4956 +/- 0.0238
ExtraTreesClassifier Cross-Validation Precision: 0.4958 +/- 0.0249
ExtraTreesClassifier Cross-Validation Recall: 0.4951 +/- 0.0250
ExtraTreesClassifier Cross-Validation F1-Score: 0.4943 +/- 0.0247


## FastRp

In [93]:
fast_graph_df.head()

Unnamed: 0_level_0,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,embedding_9,...,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127,Status,SurID
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201210-1-00,-0.126856,0.041533,0.312416,0.223328,0.12833,-0.112877,0.034097,-0.075095,0.027202,0.133713,...,-0.104161,0.107659,0.304749,-0.055208,0.043022,-0.101856,-0.005557,-0.018172,Normal,201210-1
201210-1-01,-0.125399,0.040796,0.311529,0.220783,0.128676,-0.112619,0.034209,-0.076966,0.028316,0.13372,...,-0.102161,0.108725,0.305113,-0.055215,0.042228,-0.103425,-0.004603,-0.019147,Normal,201210-1
201210-1-02,-0.123358,0.039899,0.31053,0.2175,0.129119,-0.112073,0.034152,-0.079315,0.029759,0.133778,...,-0.099716,0.110159,0.305353,-0.055236,0.041107,-0.105426,-0.003343,-0.02021,Normal,201210-1
201210-1-03,-0.110808,0.030174,0.303462,0.195779,0.132131,-0.110179,0.03303,-0.095497,0.039937,0.130465,...,-0.084771,0.119222,0.307137,-0.058461,0.033479,-0.119889,0.002414,-0.026324,Normal,201210-1
201210-1-04,-0.121557,0.038382,0.309043,0.213698,0.129492,-0.112088,0.034283,-0.082122,0.031278,0.133294,...,-0.097029,0.111685,0.306225,-0.055691,0.040199,-0.107878,-0.002353,-0.021484,Normal,201210-1


In [94]:
evaluate_model(fast_graph_df, et, groupkfold=False)

ExtraTreesClassifier Cross-Validation Accuracy: 0.7218 +/- 0.0201
ExtraTreesClassifier Cross-Validation Precision: 0.7215 +/- 0.0202
ExtraTreesClassifier Cross-Validation Recall: 0.7214 +/- 0.0198
ExtraTreesClassifier Cross-Validation F1-Score: 0.7201 +/- 0.0201


## Fast RP and Centrality

In [95]:
joined_graph = pd.merge(graph_df, fast_graph_df, on=['SpecID', 'Status', 'SurID'], how='inner')

In [96]:
joined_graph.head()

Unnamed: 0_level_0,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,Leiden,Louvain,Status,SurID,embedding_0,embedding_1,...,embedding_118,embedding_119,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
210504-1-33,7.952879,1.327693e-08,0.0,0.15,20,2527,Hypoglycemia,210504-1,-0.152157,0.150608,...,0.009914,7.6e-05,-0.006305,0.008521,0.008568,-0.162242,0.159623,-0.312864,-0.003948,0.165256
210505-1-12,7.188303,1.298646e-08,0.0,0.15,20,2527,Hypoglycemia,210505-1,-0.151641,0.151149,...,0.012107,4.9e-05,-0.005957,0.006741,0.006479,-0.163728,0.160848,-0.313039,-0.005478,0.166675
210510-2-43,6.588763,0.0793673,0.0,0.150003,21,2658,Hyperglycemia,210510-2,-0.285738,-0.211382,...,0.146538,-0.026759,-0.142884,0.040625,-0.002492,0.112703,0.259311,-0.041402,-0.226843,0.360099
210505-1-11,6.138366,0.01445029,0.0,0.150001,23,2525,Hypoglycemia,210505-1,-0.022356,-0.000904,...,-0.169912,0.171522,0.171413,0.00061,-0.172159,-0.148379,-3.6e-05,-0.001895,1.1e-05,-0.000579
210505-1-10,5.408174,0.0143981,0.0,0.150001,23,2525,Hypoglycemia,210505-1,-0.022368,-0.000582,...,-0.170507,0.171514,0.171458,0.000388,-0.171884,-0.148636,-3e-05,-0.001241,1.2e-05,-0.000354


In [97]:
evaluate_model(joined_graph, et, groupkfold=False)

ExtraTreesClassifier Cross-Validation Accuracy: 0.7228 +/- 0.0212
ExtraTreesClassifier Cross-Validation Precision: 0.7232 +/- 0.0199
ExtraTreesClassifier Cross-Validation Recall: 0.7223 +/- 0.0204
ExtraTreesClassifier Cross-Validation F1-Score: 0.7212 +/- 0.0201


In [98]:
def delete_projection(tx):
    query = """
    CALL gds.graph.drop('myGraph')
    """
    tx.run(query)

# Use a session to execute the graph projection
with driver.session() as session:
    session.execute_write(delete_projection)

In [99]:
close_driver()