In [1]:
import torch
from community import community_louvain
from torch_geometric.data import Data, InMemoryDataset
import random
import networkx as nx
import math
import pandas as pd
from torch_geometric.utils import from_networkx
import numpy as np


In [2]:
with open("../data/HMIN_edgelist.csv", 'r') as data:
    G = nx.parse_edgelist(data, delimiter=',', create_using=nx.Graph(), nodetype=int)
# autism df
autism_df = pd.read_csv('../data/labeled_genes.csv')
autism_df = autism_df.drop_duplicates(subset='entrez_id', keep="last")
autism_nodes = autism_df['entrez_id'].to_list()

G = G.subgraph(autism_nodes)



In [3]:
autism_df['label'][autism_df['confidence']==0.75] = 2
autism_df['label'][autism_df['confidence'] == 0.5] = 3
# y label
y = torch.tensor(autism_df['label'].to_list())


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  autism_df['label'][autism_df['confidence'] == 0.5] = 3


In [4]:
# feature: node degree
degrees = torch.tensor([val for (node, val) in G.degree()], dtype=torch.float)
# closeness
closeness = torch.tensor([val for (node, val) in nx.closeness_centrality(G).items()], dtype=torch.float)
#  Betweenness
betweenness =  torch.tensor([val for (node, val) in nx.betweenness_centrality(G).items()], dtype=torch.float)
# feature: eigenvector_centrality
ec = torch.tensor([val for (node, val) in nx.eigenvector_centrality(G).items()], dtype=torch.float)
# feature: page rank
pr = torch.tensor([val for (node, val) in nx.pagerank(G, alpha=0.9).items()], dtype=torch.float)

x = torch.stack((degrees, closeness, betweenness, pr, ec)).t()


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score


In [6]:
X = x.numpy()
y = y.numpy()

In [7]:
X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.2, random_state=0)


Random Forest

In [8]:
model_rf = RandomForestClassifier(n_estimators=100, random_state=0)
model_rf.fit(X_train,y_train)

RandomForestClassifier(random_state=0)

In [9]:
y_pred = model_rf.predict(X_test)

In [10]:
accuracy_score(y_test, y_pred)

0.5286103542234333

In [11]:
from sklearn.metrics import classification_report
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       0.81      0.59      0.68       303
           1       0.00      0.00      0.00         1
           2       0.00      0.00      0.00         3
           3       0.17      0.27      0.21        60

    accuracy                           0.53       367
   macro avg       0.25      0.21      0.22       367
weighted avg       0.70      0.53      0.60       367



In [12]:
all_accuracies = cross_val_score(estimator=model_rf, X=X_train, y=y_train, cv=5, n_jobs=-1)
print(all_accuracies.mean())

0.5572055443337744


SVM


In [13]:
from sklearn.svm import SVC

In [15]:
model_SVM = SVC(kernel='rbf')
model_SVM.fit(X_train,y_train)

SVC()

In [16]:
y_pred = model_SVM.predict(X_test)

In [17]:
accuracy_score(y_test, y_pred)

0.5885558583106267

In [18]:
from sklearn.metrics import classification_report
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       0.99      0.60      0.74       363
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         4

    accuracy                           0.59       367
   macro avg       0.25      0.15      0.19       367
weighted avg       0.98      0.59      0.73       367



  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
all_accuracies = cross_val_score(estimator=model_SVM, X=X_train, y=y_train, cv=5, n_jobs=-1)
print(all_accuracies.mean())

0.6035383436651112
