In [27]:
from src.mock_graph import get_mock_wheel_graph, get_mock_fully_connected_graph, get_mock_random_graph
from src.graph_from_data import get_facebook_dataframe_graph, get_deezer_dataframe_graph, get_facebook_circles_data
from src.fastgraph import FastGraph, FastGraphSettings
from src.graph import Graph
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import numpy as np
import numpy as np
import pandas as pd
import numpy as np
import networkx as nx


In [29]:
fb_graph = get_facebook_dataframe_graph(cut = -1)

fb_settings = FastGraphSettings(
    render = False, 
    render_isomorphic_graphs = False,
    render_auto_walk_delay_seconds = 0.0001,
    render_x_isomorphisms_per_column = 6, 
    subgraph_size = 5,
    letters_per_sentence = 100,
    sentences_to_generate = 1000,
    spacebar_probability = 0.1,
    end_of_sentence_symbol = '\n'
)

In [30]:
fb_fastgraph = FastGraph(
    fb_graph,
    fb_settings,
    "facebook_dataframe_graph"
)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [04:15<00:00,  3.91it/s]


In [31]:
fb_fastgraph.fit_model()

In [47]:
fb_fastgraph.get_nearest_neighbors("2")

[['2', 0.0],
 ['65', 0.0037481575],
 ['250', 0.0037578107],
 ['169', 0.0037771377],
 ['229', 0.0037823424],
 ['342', 0.0038213362],
 ['332', 0.0038268624],
 ['347', 0.0038406416],
 ['146', 0.0038473562],
 ['88', 0.0038545572],
 ['121', 0.0038604063],
 ['222', 0.0038667014],
 ['283', 0.00387672],
 ['130', 0.0038828305],
 ['303', 0.0038894108],
 ['285', 0.0039020653],
 ['50', 0.0039059608],
 ['77', 0.003910312],
 ['100', 0.003910835],
 ['16', 0.0039116982],
 ['122', 0.003914057],
 ['178', 0.003931409],
 ['269', 0.003933113],
 ['158', 0.0039457725],
 ['54', 0.0039556473],
 ['21', 0.0039556627],
 ['63', 0.003958236],
 ['236', 0.00395875],
 ['38', 0.0039629964],
 ['87', 0.0039640795],
 ['260', 0.0039659073],
 ['106', 0.0039662947],
 ['235', 0.0039663217],
 ['48', 0.00396697],
 ['10', 0.003969231],
 ['126', 0.003971785],
 ['248', 0.0039727655],
 ['297', 0.003974018],
 ['19', 0.003977089],
 ['165', 0.0039775306],
 ['79', 0.0039806613],
 ['119', 0.0039840112],
 ['53', 0.003985511],
 ['203', 0.

In [32]:
embeddings_dict = dict(sorted(fb_fastgraph.embeddings_dict.items(), key = lambda x:int(x[0])))

In [33]:
indices = np.array(list(map(lambda x:int(x[0]), get_facebook_circles_data())))
y = np.array(list(map(lambda x:x[1], get_facebook_circles_data())))
X_n = []
y_n = []
for i,idx in enumerate(indices):
    if str(idx) in embeddings_dict.keys():
        X_n.append(embeddings_dict[str(idx)])
        y_n.append(y[i])
X = X_n
y = y_n

In [44]:
min(pd.DataFrame(X).min())

-0.21491500735282898

In [43]:
max(pd.DataFrame(X).max())

0.23014037311077118

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = LogisticRegression(random_state=0, multi_class='multinomial').fit(X_train, y_train)

In [36]:
clf.fit(X_train,y_train)

In [37]:
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.358695652173913

## Cora Dataset

In [3]:
citations = pd.read_csv(
    "data/cora.cites",
    sep="\t",
    header=None,
    names=["target", "source"])
column_names = ["paper_id"] + [f"term_{idx}" for idx in range(1433)] + ["subject"]
papers = pd.read_csv( "data/cora.content", sep="\t", header=None, names=column_names)

In [4]:
cora_graph_nx = nx.from_pandas_edgelist(citations)
cora_graph = Graph(cora_graph_nx, verify_connected=False)
subjects = list(papers[papers["paper_id"].isin(list(cora_graph_nx.nodes))]["subject"])
cora_settings = FastGraphSettings(
    render = False, 
    render_isomorphic_graphs = False,
    render_auto_walk_delay_seconds = 0.0001,
    render_x_isomorphisms_per_column = 6, 
    subgraph_size = 5,
    letters_per_sentence = 100,
    sentences_to_generate = 1000,
    spacebar_probability = 0.1,
    end_of_sentence_symbol = '\n'
)

In [None]:
cora_fastgraph = FastGraph(
    cora_graph,
    cora_settings,
    "cora_dataframe_graph"
)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [01:47<00:00,  9.32it/s]


In [6]:
cora_fastgraph.fit_model()

In [8]:
X = pd.DataFrame.from_dict(cora_fastgraph.embeddings_dict, orient = 'index')
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
1033,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
103482,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
103515,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1050679,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1140231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853114,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853155,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853115,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
X_cropped = X.drop(X.index[5:2703]).drop(X.columns[5:95], axis=1); X_cropped

Unnamed: 0,0,1,2,3,4,95,96,97,98,99
1033,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
103482,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
103515,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1050679,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1140231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853114,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853155,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853115,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853118,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
cora_fastgraph.get_nearest_neighbors("315266")

[['1033', 0.0],
 ['35', 0.0],
 ['103482', 0.0],
 ['103515', 0.0],
 ['1050679', 0.0],
 ['1103960', 0.0],
 ['1103985', 0.0],
 ['1109199', 0.0],
 ['1112911', 0.0],
 ['1113438', 0.0],
 ['1113831', 0.0],
 ['1114331', 0.0],
 ['1117476', 0.0],
 ['1119505', 0.0],
 ['1119708', 0.0],
 ['1120431', 0.0],
 ['1123756', 0.0],
 ['1125386', 0.0],
 ['1127430', 0.0],
 ['1127913', 0.0],
 ['1128204', 0.0],
 ['1128227', 0.0],
 ['1128314', 0.0],
 ['1128453', 0.0],
 ['1128945', 0.0],
 ['1128959', 0.0],
 ['1128985', 0.0],
 ['1129018', 0.0],
 ['1129027', 0.0],
 ['1129573', 0.0],
 ['1129683', 0.0],
 ['1129778', 0.0],
 ['1130847', 0.0],
 ['1130856', 0.0],
 ['1131116', 0.0],
 ['1131360', 0.0],
 ['1131557', 0.0],
 ['1131752', 0.0],
 ['1133196', 0.0],
 ['1133338', 0.0],
 ['1136814', 0.0],
 ['1137466', 0.0],
 ['1152421', 0.0],
 ['1152508', 0.0],
 ['1153065', 0.0],
 ['1153280', 0.0],
 ['1153577', 0.0],
 ['1153853', 0.0],
 ['1153943', 0.0],
 ['1154176', 0.0],
 ['1154459', 0.0],
 ['116552', 0.0],
 ['12576', 0.0],
 ['128