# Gaussian Kernel Graph

<p align = "justify">In this appraoch, each spectra is a node and every node is connected to every other node in the graph making it fully connected. To calculate the relationahip weight between each node, we first calculate the euchildean distance between each spectra, using each spectras absorbance values as a vector. We can do this as the intervals between intensity data points remains unchanged across every spectra. We then pass the euchildean distance through the Gaussian kernel function to get a similarity measure between each spectra. This is then used as a relationship weight.

## Importing and Preprocessing Data for Neo4j

Import relevant libraries and import raw data

In [1]:
import sys
sys.path.append('..')  # Adds the parent directory to the path so Python can find the `Cleaning_and_Evaluation` package
from Cleaning_and_Evaluation import *
import seaborn as sns
import pandas as pd
from scipy.signal import find_peaks
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
import numpy as np
import os

Here we import the neo4j directory and password variables that have been set in our environment

In [2]:
os_name = os.name

if os_name == 'nt':
    password = os.getenv('NEO4J_Password')
    neo4j_directory = os.getenv('NEO4J_Directory')
else:
    password = os.environ['NEO4J_Password']
    neo4j_directory = os.environ['NEO4J_Directory']

Alternatively you can manually input the neo4j directory and password here

In [3]:
# password = "your_neo4j_password"
# neo4j_directory = "your_neo4j_dbms_directory"

In [4]:
df = pd.read_csv("../data/exosomes.raw_spectrum_400-1800.csv")

In [5]:
cleaning_params = {
    'despike': True,
    'baseline_correct': True,
    'smoothing': True,
    'scaling': False,
    'despike_ma': 20,
    'despike_threshold': 7,
    'lam': 10**8,
    'p': 0.01,
    'window_size': 51,
    'poly_order': 3
}
spectra_cleaning(df, **cleaning_params)

df

Unnamed: 0,SpecID,Seq,WaveNumber,Absorbance,SurID,Status
0,201210-1-00,293,400.22778,41.863303,201210-1,Normal
1,201210-1-00,294,400.91116,41.803843,201210-1,Normal
2,201210-1-00,295,401.59454,41.741884,201210-1,Normal
3,201210-1-00,296,402.27789,41.677722,201210-1,Normal
4,201210-1-00,297,402.96127,41.611654,201210-1,Normal
...,...,...,...,...,...,...
6239200,210526-3-09,2337,1797.03870,12.378163,210526-3,Hyperglycemia
6239201,210526-3-09,2338,1797.72200,13.269937,210526-3,Hyperglycemia
6239202,210526-3-09,2339,1798.40550,14.199285,210526-3,Hyperglycemia
6239203,210526-3-09,2340,1799.08890,15.166531,210526-3,Hyperglycemia


In [6]:
pivot_df = prepare_wavelength_df(df, 'Absorbance')

Calculate euchlidean distance between each spectra

In [7]:
from scipy.spatial.distance import pdist, squareform

# Drop the 'Status' column as it is not numeric
X = pivot_df.drop(columns=['Status', 'SurID'])

# Calculate pairwise Euclidean distances
distances = pdist(X.values, metric='euclidean')

# Convert the condensed distances to a square matrix form
distance_matrix = squareform(distances)

Pass this through the Gaussian Kernel

In [8]:
def gaussian_kernel(distances, sigma):
    return np.exp(-distances**2 / (2 * sigma**2))

# Sigma is the bandwidth parameter
sigma = 4000 # Adjust sigma as needed
kernel_matrix = gaussian_kernel(distance_matrix, sigma)

# Convert the kernel matrix to a DataFrame
kernel_df = pd.DataFrame(kernel_matrix, index=pivot_df.index, columns=pivot_df.index)

In [9]:
kernel_df.head()

SpecID,201210-1-00,201210-1-01,201210-1-02,201210-1-03,201210-1-04,201210-1-05,201210-1-06,201210-1-07,201210-1-09,201210-1-10,...,210526-3-40,210526-3-41,210526-3-42,210526-3-43,210526-3-44,210526-3-45,210526-3-46,210526-3-47,210526-3-48,210526-3-49
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201210-1-00,1.0,0.960222,0.185942,0.016573,0.840565,0.733965,0.732632,0.835303,0.871049,0.918058,...,0.938829,0.937744,0.928191,0.936565,0.939272,0.935973,0.934493,0.931532,0.934876,0.932618
201210-1-01,0.960222,1.0,0.232076,0.022232,0.889082,0.796927,0.780446,0.853169,0.87805,0.900857,...,0.886819,0.888595,0.875283,0.877737,0.876936,0.871103,0.869375,0.862815,0.867528,0.864865
201210-1-02,0.185942,0.232076,1.0,0.093867,0.186239,0.179427,0.161619,0.174763,0.184361,0.170892,...,0.13414,0.135923,0.132599,0.131456,0.130767,0.128275,0.129061,0.132162,0.128803,0.127268
201210-1-03,0.016573,0.022232,0.093867,1.0,0.019366,0.020385,0.020779,0.020482,0.021064,0.016993,...,0.011996,0.012505,0.011572,0.01116,0.010909,0.010474,0.010403,0.010595,0.01052,0.010335
201210-1-04,0.840565,0.889082,0.186239,0.019366,1.0,0.944775,0.83883,0.85148,0.847944,0.857373,...,0.795307,0.79721,0.780925,0.780141,0.771758,0.765216,0.764966,0.751632,0.756838,0.760434


In [10]:
# Renaming and preprocessing for Neo4j
nodes_df = pivot_df['Status'].to_frame()
nodes_df = nodes_df.rename_axis("SpecID:ID")
nodes_df = nodes_df.rename(columns={"Status": ":LABEL"})

In [11]:
# Saving into Neo4j import folder
nodes_df.to_csv(f"{neo4j_directory}/import/nodes.csv")

In [12]:
# Renaming and preprocessing for Neo4j
relationship_df = kernel_df.reset_index()
relationship_df = relationship_df.melt(id_vars='SpecID', var_name=':END_ID', value_name='Weight:float')
relationship_df = relationship_df.rename(columns={'SpecID': ':START_ID'})
relationship_df[':TYPE'] = 'LINK'
relationship_df = relationship_df[relationship_df[":START_ID"] != relationship_df[":END_ID"]]
relationship_df = relationship_df[relationship_df[":START_ID"] < relationship_df[":END_ID"]]

In [13]:
# Saving into Neo4j import folder
relationship_df.to_csv(f"{neo4j_directory}/import/relationships.csv")

## Building Graph and Running Graph Algorithms

In [14]:
from neo4j import GraphDatabase
import shutil
import subprocess

Then run the admin import command.

In [15]:
os_name = os.name

if os_name == 'nt':
    working_dir = f'{neo4j_directory}/bin'
    command = 'neo4j-admin database import full --nodes=import/nodes.csv --relationships=import/relationships.csv --overwrite-destination neo4j'
    result = subprocess.run(command, shell=True, cwd=working_dir, capture_output=True, text=True)
else:
    working_dir = f'{neo4j_directory}'
    command = [
    './bin/neo4j-admin', 'database', 'import', 'full',
    '--nodes=import/nodes.csv',
    '--relationships=import/relationships.csv',
    '--overwrite-destination',  'neo4j'
    ]
    result = subprocess.run(command, cwd=working_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)


# Check if the command was successful
if result.returncode == 0:
    print("Import successful")
    print(result.stdout)
else:
    print("Error during import:")
    print(result.stderr)

Import successful
Neo4j version: 5.18.0
Importing the contents of these files into C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\data\databases\neo4j:
Nodes:
  C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\import\nodes.csv

Relationships:
  C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\import\relationships.csv


Available resources:
  Total machine memory: 31.86GiB
  Free machine memory: 15.56GiB
  Max heap memory : 910.5MiB
  Max worker threads: 8
  Configured max memory: 13.28GiB
  High parallel IO: true

Cypher type normalization is enabled (disable with --normalize-types=false):
  Property type of 'Weight' normalized from 'float' --> 'double' in C:\Users\stang\.Neo4jDesktop\relate-data\dbmss\dbms-d0a05d27-d2ec-404f-a7c3-e7f4b6a97351\import\relationships.csv

Import starting 2024-04-21 13:12:55.757+0100
  Estimated number of nodes: 3.05 k
  Estimated numbe

In [134]:
if os_name != 'nt':
    command = [
        './bin/neo4j', 'restart'
    ]

    result = subprocess.run(command, cwd=neo4j_directory, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Check if the command was successful
    if result.returncode == 0:
        print("Restart successful")
        print(result.stdout)
    else:
        print("Error during restart:")
        print(result.stderr)

Restart successful


Run and connect to the Neo4j Database

In [16]:
from neo4j import GraphDatabase

uri = "neo4j://localhost:7687"
username = "neo4j"              # Neo4J username
#Password defined at teh top of the notebook

# Create a driver instance
driver = GraphDatabase.driver(uri, auth=(username, password))

# Ensure you close the driver connection when your program ends
def close_driver():
    driver.close()

**First create the gds Graph Projection**

In [17]:
def project_graph(tx):
    query = """
    CALL gds.graph.project(
      'myGraph', 
      ['Normal', 'Hyperglycemia', 'Hypoglycemia'],
      {
        LINK: {
          orientation: 'UNDIRECTED',
          properties: 'Weight'
        }
      }
    )
    """
    tx.run(query)

# Use a session to execute the graph projection
with driver.session() as session:
    session.execute_write(project_graph)

Define the graph algorithms.

In [18]:
def run_pagerank_centrality(tx):
    query = """
    CALL gds.pageRank.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [19]:
def run_degree_centrality(tx):
    query = """
    CALL gds.degree.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [20]:
def run_eigenvector_centrality(tx):
    query = """
    CALL gds.eigenvector.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [21]:
def run_articlerank_centrality(tx):
    query = """
    CALL gds.articleRank.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [22]:
def run_label_propagation_algorithm(tx):
    query = """
    CALL gds.labelPropagation.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [23]:
def run_leiden_algorithm(tx):
    query = """
    CALL gds.leiden.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [24]:
def run_louvain_algorithm(tx):
    query = """
    CALL gds.louvain.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [25]:
def run_node2vec_algorithm(tx):
    query = """
    CALL gds.node2vec.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, embedding
    RETURN gds.util.asNode(nodeId).SpecID AS name, embedding
    """
    results = tx.run(query)
    return [(record["name"], record["embedding"]) for record in results]

In [26]:
def run_fastRP_algorithm(tx):
    query = """
    CALL gds.fastRP.stream('myGraph',
        { relationshipWeightProperty: 'Weight',
         randomSeed:1234,
         embeddingDimension: 128
        }
    )
    YIELD nodeId, embedding
    RETURN gds.util.asNode(nodeId).SpecID AS name, embedding
    """
    results = tx.run(query)
    return [(record["name"], record["embedding"]) for record in results]

Execute the algorithms and store the results in a Dataframe.

In [27]:
# Use a session to execute the queries and retrieve the results
with driver.session() as session:
    pagerank_results = session.execute_read(run_pagerank_centrality)
    degree_results = session.execute_read(run_degree_centrality)
    eigenvector_results = session.execute_read(run_eigenvector_centrality)
    articlerank_results = session.execute_read(run_articlerank_centrality)
    label_propagation_results = session.execute_read(run_label_propagation_algorithm)
    leiden_results = session.execute_read(run_leiden_algorithm)
    louvain_results = session.execute_read(run_louvain_algorithm)

In [28]:
pagerank_df = pd.DataFrame(pagerank_results, columns=['name', 'PageRank'])
degree_df = pd.DataFrame(degree_results, columns=['name', 'DegreeCentrality'])
eigenvector_df = pd.DataFrame(eigenvector_results, columns=['name', 'EigenvectorCentrality'])
articlerank_df = pd.DataFrame(articlerank_results, columns=['name', 'ArticleRank'])
label_propagation_df = pd.DataFrame(label_propagation_results, columns=['name', 'LabelPropagation'])
leiden_df = pd.DataFrame(leiden_results, columns=['name', 'Leiden'])
louvain_df = pd.DataFrame(louvain_results, columns=['name', 'Louvain'])

In [29]:
merged_df = pagerank_df
for df in [degree_df, eigenvector_df, articlerank_df, leiden_df, louvain_df]:
    merged_df = pd.merge(merged_df, df, on=['name'], how='left')

In [30]:
df = merged_df.rename(columns={'name' : 'SpecID'})
df.head()

Unnamed: 0,SpecID,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,Leiden,Louvain
0,210504-1-29,8.063099,2.118526e-10,0.0,0.15,127,2394
1,210505-1-11,6.324768,0.02060467,0.0,0.150001,70,2526
2,210510-2-43,5.51072,0.1276318,0.0,0.150005,25,2648
3,210505-1-10,5.496797,0.02045747,0.0,0.150001,70,2526
4,210504-1-33,5.463643,9.491556e-08,0.0,0.15,67,2398


In [31]:
with driver.session() as session:
    fastRP_results = session.execute_read(run_fastRP_algorithm)

fastRP_df = pd.DataFrame(fastRP_results, columns=['SpecID', 'embeddings'])

# Expand the embeddings list into separate columns
embeddings_df = pd.DataFrame(fastRP_df['embeddings'].tolist(), index=fastRP_df.index)

# Optionally, rename the new columns
embeddings_df.columns = [f'embedding_{i}' for i in range(embeddings_df.shape[1])]

# Join the new embeddings columns to the original DataFrame
fastRP_df = pd.concat([fastRP_df.drop(['embeddings'], axis=1), embeddings_df], axis=1)
fastRP_df.to_csv('../../data/fastRP_embeddings.csv', index=False)
fastRP_df.head()

Unnamed: 0,SpecID,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,...,embedding_118,embedding_119,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127
0,201210-1-00,-0.126921,0.029483,0.313777,0.217399,0.141151,-0.116146,0.036769,-0.075381,0.028979,...,-0.197569,-0.007299,-0.104828,0.108536,0.31062,-0.044107,0.028253,-0.101956,-0.004545,-0.022226
1,201210-1-01,-0.124933,0.028509,0.312783,0.214692,0.141758,-0.115651,0.036883,-0.077285,0.030259,...,-0.197404,-0.007553,-0.102692,0.10956,0.310848,-0.044121,0.027004,-0.1036,-0.003526,-0.023261
2,201210-1-02,-0.12103,0.026722,0.311113,0.209621,0.14296,-0.114374,0.036854,-0.08079,0.032723,...,-0.197173,-0.008203,-0.098837,0.111467,0.311111,-0.044168,0.024581,-0.106579,-0.001555,-0.024959
3,201210-1-03,-0.100998,0.012337,0.301661,0.183001,0.148147,-0.110117,0.034071,-0.099429,0.046803,...,-0.195584,-0.005931,-0.080722,0.12095,0.312045,-0.049881,0.012532,-0.125008,0.004476,-0.031517
4,201210-1-04,-0.122028,0.026827,0.311272,0.210577,0.142558,-0.115046,0.036947,-0.080157,0.032158,...,-0.197155,-0.007748,-0.099646,0.111094,0.311228,-0.044407,0.02524,-0.106179,-0.002234,-0.024691


Now that we have all our graph metrics, we need to join back with our nodes dataframe in order to get the status column back and perform some quick cleaning

In [32]:
status_df = pd.read_csv(f"{neo4j_directory}/import/nodes.csv")

graph_df = pd.merge(df, status_df, left_on="SpecID", right_on="SpecID:ID", how="inner")
graph_df.drop(columns=["SpecID:ID"], inplace=True)
graph_df['SurID'] = graph_df['SpecID'].str[:-3]
graph_df.set_index("SpecID", inplace=True)

fast_graph_df = pd.merge(fastRP_df, status_df, left_on="SpecID", right_on="SpecID:ID", how="inner")
fast_graph_df.drop(columns=["SpecID:ID"], inplace=True)
fast_graph_df['SurID'] = fast_graph_df['SpecID'].str[:-3]
fast_graph_df.set_index("SpecID", inplace=True)

graph_df.rename(columns={":LABEL":"Status"}, inplace=True)

fast_graph_df.rename(columns={":LABEL":"Status"}, inplace=True)

## Machine Learning Using Graph Metrics GroupKFold

## Centrality Metrics

In [33]:
graph_df.head()

Unnamed: 0_level_0,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,Leiden,Louvain,Status,SurID
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
210504-1-29,8.063099,2.118526e-10,0.0,0.15,127,2394,Hypoglycemia,210504-1
210505-1-11,6.324768,0.02060467,0.0,0.150001,70,2526,Hypoglycemia,210505-1
210510-2-43,5.51072,0.1276318,0.0,0.150005,25,2648,Hyperglycemia,210510-2
210505-1-10,5.496797,0.02045747,0.0,0.150001,70,2526,Hypoglycemia,210505-1
210504-1-33,5.463643,9.491556e-08,0.0,0.15,67,2398,Hypoglycemia,210504-1


In [34]:
from sklearn.ensemble import ExtraTreesClassifier
et = ExtraTreesClassifier(random_state=1234)
evaluate_model(graph_df, et)

ExtraTreesClassifier Cross-Validation Accuracy: 0.3573 +/- 0.0585
ExtraTreesClassifier Cross-Validation Precision: 0.3579 +/- 0.0540
ExtraTreesClassifier Cross-Validation Recall: 0.3584 +/- 0.0801
ExtraTreesClassifier Cross-Validation F1-Score: 0.3411 +/- 0.0625


## FastRp

In [35]:
fast_graph_df.head()

Unnamed: 0_level_0,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,embedding_9,...,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127,Status,SurID
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
201210-1-00,-0.126921,0.029483,0.313777,0.217399,0.141151,-0.116146,0.036769,-0.075381,0.028979,0.141409,...,-0.104828,0.108536,0.31062,-0.044107,0.028253,-0.101956,-0.004545,-0.022226,Normal,201210-1
201210-1-01,-0.124933,0.028509,0.312783,0.214692,0.141758,-0.115651,0.036883,-0.077285,0.030259,0.141053,...,-0.102692,0.10956,0.310848,-0.044121,0.027004,-0.1036,-0.003526,-0.023261,Normal,201210-1
201210-1-02,-0.12103,0.026722,0.311113,0.209621,0.14296,-0.114374,0.036854,-0.08079,0.032723,0.140436,...,-0.098837,0.111467,0.311111,-0.044168,0.024581,-0.106579,-0.001555,-0.024959,Normal,201210-1
201210-1-03,-0.100998,0.012337,0.301661,0.183001,0.148147,-0.110117,0.034071,-0.099429,0.046803,0.131117,...,-0.080722,0.12095,0.312045,-0.049881,0.012532,-0.125008,0.004476,-0.031517,Normal,201210-1
201210-1-04,-0.122028,0.026827,0.311272,0.210577,0.142558,-0.115046,0.036947,-0.080157,0.032158,0.140224,...,-0.099646,0.111094,0.311228,-0.044407,0.02524,-0.106179,-0.002234,-0.024691,Normal,201210-1


In [36]:
evaluate_model(fast_graph_df, et)

ExtraTreesClassifier Cross-Validation Accuracy: 0.4403 +/- 0.0876
ExtraTreesClassifier Cross-Validation Precision: 0.4411 +/- 0.0893
ExtraTreesClassifier Cross-Validation Recall: 0.4510 +/- 0.1147
ExtraTreesClassifier Cross-Validation F1-Score: 0.4257 +/- 0.0980


## Fast RP and Centrality

In [37]:
joined_graph = pd.merge(graph_df, fast_graph_df, on=['SpecID', 'Status', 'SurID'], how='inner')

In [38]:
joined_graph.head()

Unnamed: 0_level_0,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,Leiden,Louvain,Status,SurID,embedding_0,embedding_1,...,embedding_118,embedding_119,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127
SpecID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
210504-1-29,8.063099,2.118526e-10,0.0,0.15,127,2394,Hypoglycemia,210504-1,0.021751,-0.026518,...,-0.136387,-0.235179,-0.145412,-0.02277,-0.001581,-0.024655,-0.054248,-0.004574,0.02615,-0.092724
210505-1-11,6.324768,0.02060467,0.0,0.150001,70,2526,Hypoglycemia,210505-1,-0.022296,-0.001723,...,-0.168302,0.171687,0.171312,0.001063,-0.172891,-0.14789,-0.000173,-0.003619,8.1e-05,-0.001271
210510-2-43,5.51072,0.1276318,0.0,0.150005,25,2648,Hyperglycemia,210510-2,-0.308912,-0.22757,...,0.173873,-0.038644,-0.146218,0.054992,-0.019909,0.107205,0.274905,-0.034321,-0.240103,0.348369
210505-1-10,5.496797,0.02045747,0.0,0.150001,70,2526,Hypoglycemia,210505-1,-0.022321,-0.001082,...,-0.16949,0.171627,0.17138,0.000765,-0.172349,-0.148217,-0.000154,-0.002431,8.1e-05,-0.000787
210504-1-33,5.463643,9.491556e-08,0.0,0.15,67,2398,Hypoglycemia,210504-1,-0.15326,0.150964,...,0.010562,-0.000109,-0.003021,0.004249,0.005183,-0.163664,0.160483,-0.313015,-0.001889,0.164664


In [39]:
evaluate_model(joined_graph, et)

ExtraTreesClassifier Cross-Validation Accuracy: 0.4443 +/- 0.0785
ExtraTreesClassifier Cross-Validation Precision: 0.4485 +/- 0.0780
ExtraTreesClassifier Cross-Validation Recall: 0.4534 +/- 0.1052
ExtraTreesClassifier Cross-Validation F1-Score: 0.4310 +/- 0.0884


In [40]:
def delete_projection(tx):
    query = """
    CALL gds.graph.drop('myGraph')
    """
    tx.run(query)

# Use a session to execute the graph projection
with driver.session() as session:
    session.execute_write(delete_projection)

In [41]:
close_driver()