In [93]:
import numpy as np
import pandas as pd
import json
import random

In [94]:
seed = 0
np.random.seed(seed)
random.seed(seed)

with open('../dataset_path.json') as file:
    paths = json.load(file)

raw_path = paths['cora']['raw']
processed_path = paths['cora']['processed']

citation_network = pd.read_csv(raw_path + '/cora_network.cites', sep='\t', header=None)
node_features = pd.read_csv(raw_path + '/cora_features.content', sep='\t', header=None, usecols=[0, 1434])


In [95]:
def generate_features(embedding_dim = 128):    
    # for _ in range(embedding_dim):
    return list(np.random.randn(embedding_dim))

features = []
for node in node_features[0]:
    features.append(generate_features())
    
node_features['features'] = features
node_features.columns = ['node', 'class', 'random_feature']

node_features

Unnamed: 0,node,class,random_feature
0,31336,Neural_Networks,"[1.764052345967664, 0.4001572083672233, 0.9787379841057392, 2.240893199201458, 1.8675579901499675, -0.977277879876411, 0.9500884175255894, -0.1513572082976979, -0.10321885179355784, 0.41059850193837233, 0.144043571160878, 1.454273506962975, 0.7610377251469934, 0.12167501649282841, 0.44386323274542566, 0.33367432737426683, 1.4940790731576061, -0.20515826376580087, 0.31306770165090136, -0.8540957393017248, -2.5529898158340787, 0.6536185954403606, 0.8644361988595057, -0.7421650204064419, 2.2697546239876076, -1.4543656745987648, 0.04575851730144607, -0.1871838500258336, 1.5327792143584575, 1.469358769900285, 0.1549474256969163, 0.37816251960217356, -0.8877857476301128, -1.980796468223927, -0.3479121493261526, 0.15634896910398005, 1.2302906807277207, 1.2023798487844113, -0.3873268174079523, -0.30230275057533557, -1.0485529650670926, -1.4200179371789752, -1.7062701906250126, 1.9507753952317897, -0.5096521817516535, -0.4380743016111864, -1.2527953600499262, 0.7774903558319101, -1.6138978475579515, -0.2127402802139687, -0.8954665611936756, 0.386902497859262, -0.510805137568873, -1.180632184122412, -0.028182228338654868, 0.42833187053041766, 0.06651722238316789, 0.3024718977397814, -0.6343220936809636, -0.3627411659871381, -0.672460447775951, -0.3595531615405413, -0.813146282044454, -1.7262826023316769, 0.17742614225375283, -0.4017809362082619, -1.6301983469660446, 0.4627822555257742, -0.9072983643832422, 0.05194539579613895, 0.7290905621775369, 0.12898291075741067, 1.1394006845433007, -1.2348258203536526, 0.402341641177549, -0.6848100909403132, -0.8707971491818818, -0.5788496647644155, -0.31155253212737266, 0.05616534222974544, -1.1651498407833565, 0.9008264869541871, 0.46566243973045984, -1.5362436862772237, 1.4882521937955997, 1.8958891760305832, 1.1787795711596507, -0.17992483581235091, -1.0707526215105425, 1.0544517269311366, -0.40317694697317963, 1.2224450703824274, 0.2082749780768603, 0.9766390364837128, 0.3563663971744019, 0.7065731681919482, 0.010500020720820478, 1.7858704939058352, 0.12691209270361992, 0.40198936344470165, ...]"
1,1061127,Rule_Learning,"[0.6722947570124355, 0.40746183624111043, -0.7699160744453164, 0.5392491912918173, -0.6743326606573761, 0.03183055827435118, -0.635846078378881, 0.6764332949464997, 0.5765908166149409, -0.20829875557799488, 0.3960067126616453, -1.0930615087305058, -1.4912575927056055, 0.4393917012645369, 0.16667349537252904, 0.6350314368921064, 2.383144774863942, 0.9444794869904138, -0.9128222254441586, 1.117016288095853, -1.3159074105115212, -0.461584604814709, -0.06824160532463124, 1.7133427216493666, -0.7447548220484399, -0.8264385386590144, -0.0984525244254323, -0.6634782863621074, 1.126635922106507, -1.0799315083634233, -1.1474686524111024, -0.43782004474443403, -0.4980324506923049, 1.9295320538169858, 0.9494208069257608, 0.0875512413851909, -1.225435518830168, 0.8443629764015471, -1.0002153473895647, -1.5447710967776116, 1.1880297923523018, 0.3169426119248496, 0.920858823780819, 0.3187276529430212, 0.8568306119026912, -0.6510255933001469, -1.0342428417844647, 0.681594518281627, -0.8034096641738411, -0.6895497777502005, -0.45553250351734315, 0.01747915902505673, -0.35399391125348395, -1.3749512934180188, -0.6436184028328905, -2.2234031522244266, 0.6252314510271875, -1.6020576556067476, -1.1043833394284506, 0.052165079260974405, -0.7395629963913133, 1.5430145954067358, -1.2928569097234486, 0.26705086934918293, -0.0392828182274956, -1.1680934977411974, 0.5232766605317537, -0.1715463312222481, 0.7717905512136674, 0.8235041539637314, 2.16323594928069, 1.336527949436392, -0.3691818379424436, -0.2393791775759264, 1.0996595958871132, 0.6552637307225978, 0.640131526097592, -1.6169560443108344, -0.024326124398935636, -0.7380309092056887, 0.27992459904323824, -0.09815038964295794, 0.9101789080925919, 0.31721821519130206, 0.7863279621089762, -0.46641909673594306, -0.9444462559182504, -0.41004969320254847, -0.017020413861440594, 0.3791517355550818, 2.259308950690852, -0.04225715166064269, -0.955945000492777, -0.34598177569938643, -0.4635959746460942, 0.4814814737734622, -1.5407970144446248, 0.06326199420033171, 0.1565065379653756, 0.23218103620027578, ...]"
2,1106406,Reinforcement_Learning,"[-0.7255973784635843, -1.3833639553950554, -1.582938397335082, 0.6103793791072052, -1.188859257784029, -0.5068163542986875, -0.5963140384505081, -0.05256729626954629, -1.936279805846507, 0.18877859679382855, 0.5238910238342056, 0.08842208704466141, -0.3108861716984717, 0.09740016626878341, 0.3990463456401302, -2.77259275642665, 1.9559123082506942, 0.39009332268792646, -0.65240858238702, -0.3909533751876011, 0.49374177734918845, -0.11610393903436653, -2.0306844677814944, 2.0644928613593194, -0.11054065723247261, 1.0201727117157997, -0.6920498477843912, 1.5363770542457977, 0.28634368889227957, 0.6088438344754508, -1.0452533661469547, 1.2111452896827009, 0.6898181645347884, 1.3018462295649984, -0.6280875596415789, -0.4810271184607877, 2.303916697683942, -1.0600158227215473, -0.13594970067832082, 1.1368913626026953, 0.0977249677148556, 0.5829536797532936, -0.3994490292628752, 0.37005588784751875, -1.3065268517353166, 1.658130679618188, -0.11816404512856976, -0.6801782039968504, 0.6663830820319143, -0.4607197873885533, -1.3342584714027534, -1.3467175057975553, 0.6937731526901325, -0.1595734381462669, -0.13370155966843916, 1.0777438059762627, -1.1268258087567435, -0.7306777528648248, -0.38487980918127546, 0.094351589317074, -0.042171451290578935, -0.2868871923899076, -0.0616264020956474, -0.10730527629117469, -0.7196043885517929, -0.8129929885540773, 0.2745163577239395, -0.8909150829955279, -1.1573552591908536, -0.3122922511256933, -0.1576670161638159, 2.2567234972982093, -0.7047002758562337, 0.9432607249694948, 0.7471883342046318, -1.188944955203736, 0.7732529774025997, -1.1838806401933177, -2.659172237996741, 0.6063195243593807, -1.7558905834377194, 0.45093446180591484, -0.6840108977372166, 1.6595507961898721, 1.068509399316009, -0.45338580385138766, -0.6878376110286823, -1.2140774030941206, -0.4409226322925914, -0.2803554951845091, -0.3646935443916854, 0.15670385527236397, 0.5785214977288784, 0.349654456993174, -0.764143923906443, -1.4377914738015785, 1.3645318481024713, -0.6894491845499376, -0.6522935999350191, -0.5211893123011109, ...]"
3,13195,Reinforcement_Learning,"[0.0386305518401881, -1.6567151023219537, -0.9855107376841507, -1.4718350074635869, 1.6481349322075596, 0.16422775548733395, 0.5672902778526694, -0.2226751005151545, -0.35343174875719907, -1.6164741886510325, -0.2918373627478628, -0.7614922118116233, 0.8579239242923363, 1.1411018666575734, 1.4665787155741776, 0.852551939461232, -0.5986539369229861, -1.1158969859603944, 0.7666631816450861, 0.3562928174722889, -1.7685384506770307, 0.35548179274376907, 0.8145198224878664, 0.05892558918162996, -0.18505367100934153, -0.8076484876163557, -1.4465346995633879, 0.8002979493400275, -0.3091144447717088, -0.23346666154369272, 1.7327211869191332, 0.6845011068591904, 0.3708250012811021, 0.14206180518723566, 1.5199948607657727, 1.7195893074161945, 0.9295051114795281, 0.5822245913979243, -2.0946030712061448, 0.12372191423350658, -0.130106954193704, 0.09395322938556872, 0.9430460873225178, -2.7396771671895563, -0.5693120534701851, 0.26990435494076137, -0.4668455460527625, -1.4169061131262595, 0.8689634868967954, 0.27687190584612803, -0.9711045704444846, 0.3148172045158238, 0.8215857120497958, 0.005292646299360854, 0.8005648034309968, 0.07826017516166135, -0.39522898265435435, -1.159420516399913, -0.08593076697161273, 0.19429293804577166, 0.8758327615873309, -0.11510746848722672, 0.4574156062209908, -0.9646120137337284, -0.7826291558275251, -0.11038929902688775, -1.0546284639850139, 0.8202478373246812, 0.4631303293186071, 0.2790957643924534, 0.33890412521594454, 2.0210435614847975, -0.46886418796679563, -2.201441285500558, 0.1993001968964652, -0.050603540961665895, -0.5175190425104033, -0.9788298593587699, -0.43918952180214793, 0.18133842921782128, -0.5028167006425383, 2.4124536795437486, -0.960504381633148, -0.7931173627076716, -2.2886200400145285, 0.251484415021537, -2.01640662779976, -0.5394546333745014, -0.27567053456055696, -0.7097279658468882, 1.738872677454511, 0.9943943913154989, 1.3191368763015756, -0.8824188185499185, 1.1285940645145685, 0.4960009463439622, 0.7714059486768455, 1.0294388287827672, -0.9087632459590531, -0.4243176209779015, ...]"
4,37879,Probabilistic_Methods,"[-1.3322116545945017, -1.9686246897860202, -0.6600563201340829, 0.175818953296028, 0.4986902749098275, 1.0479721559680528, 0.2842796708072146, 1.7426687806556311, -0.22260568094832048, -0.9130792180417964, -1.6812182154944335, -0.8889713580954499, 0.242117960985123, -0.8887202573536308, 0.9367424635352571, 1.412327706037443, -2.369586905226603, 0.8640523004976479, -2.2396040586617367, 0.4014990550902875, 1.2248705641936597, 0.06485610634357618, -1.2796891732042395, -0.5854312042777726, -0.2616454457109007, -0.18224478378994294, -0.20289684076666706, -0.1098827793093138, 0.2134800489101689, -1.2085736537332212, -0.2420198298702195, 1.5182611703557054, -0.38464542314251776, -0.4438360931551978, 1.0781973037142378, -2.5591846663440965, 1.1813786012882859, -0.6319037580051673, 0.16392857245258663, 0.09632135592119682, 0.9424681192203938, -0.2675947462353477, -0.6780257815644504, 1.2978457906510987, -2.36417381714118, 0.02033418170524325, -1.3479254226291204, -0.761573388256559, 2.011256681463137, -0.044595426455857026, 0.19506969715138117, -1.7815628557055914, -0.7290446587946957, 0.19655740072878491, 0.3547576931132181, 0.6168865543932788, 0.008627898917576322, 0.5270042084546597, 0.453781912635684, -1.8297404110045314, 0.03700572191014953, 0.7679024077327037, 0.5898798207345195, -0.3638588099707899, -0.8056265075393678, -1.1183119243216322, -0.13105401154141233, 1.133079879559722, -1.951804101481602, -0.659891729729498, -1.139802455426774, 0.7849575212405001, -0.5543096265713009, -0.4706376581547914, -0.216949569936649, 0.4453932508947973, -0.39238899814963674, -3.0461430547999266, 0.5433118913875197, 0.43904295767204254, -0.21954102833121325, -1.0840366206719345, 0.3517801106813583, 0.37923553353558676, -0.4700328827008748, -0.21673147057553863, -0.9301565025243212, -0.17858909208732915, -1.550429345083481, 0.4173188210318355, -0.9443684908242939, 0.23810314783231212, -1.4059629162678993, -0.5900576458695397, -0.11048940506592783, -1.6606998118692633, 0.11514787314009216, -0.37914756287992274, -1.7423561978092306, -1.3032427541123157, ...]"
...,...,...,...
2703,1128975,Genetic_Algorithms,"[-0.5466030556227064, -1.5665766435374233, 0.4808763749066516, 0.8029467215264098, 0.6188451856130641, 0.027320676862317324, 1.6371157028813803, 2.106746077272545, -0.04889585946856758, 0.7917250136002473, 1.2144721933386038, 0.8481328721609137, 0.9901463220199921, 1.8158559620303911, 1.329986659733082, -1.392340046369394, -0.5439598763548307, -1.020074814949687, -0.4314953821816773, 1.4728015311190386, 0.3519145688669261, 0.5213327765073229, -0.6107959908404405, 0.5634473350031702, -0.09019197524651484, 0.2511254319198708, 0.8686015140301813, -0.5183041080170027, 0.07145875751426328, -0.8003917539100469, -0.8207617865865992, 0.5288632176323551, 1.2224597327974607, 1.3013968054213105, -0.2766153884091313, -0.22397126574702872, 1.547977150155915, -0.143765113843822, -1.4045402307613182, -0.24441881933239917, 0.02494008455992783, 0.04096078166276878, 1.1405865751744462, 0.8192985434127914, 0.05908460147856891, -0.22506401886528402, 0.08399922223944681, 0.7892563317257311, 1.1101393499762324, 2.884568658288131, -0.6747365006960476, 0.9438778283606948, 0.28627539938448315, 0.20518544495668245, -0.5369795700055161, -0.0027656437989723834, 0.31903811068375815, -0.6724078008600763, 0.2213511230653963, 0.11715049115438754, 0.023802502603528487, 0.49877403439319945, -0.38935805627185294, 0.36429824643371034, 0.8676266402909287, 0.583919917676943, 2.3887842972708766, 2.591530525583183, 0.0831252964548785, 0.5510268598862136, -1.0808628886232499, -0.7669690810855779, 2.052002646931058, -0.12036645200684756, 0.7425422194439041, 1.223615537395141, -0.49105036658275303, -1.5949377850337603, -1.5918061702204953, -0.8820389178252963, -1.9105128056490612, -2.388891816114587, 1.6271387486413647, -0.5763847725443313, -0.7894867062982641, -0.9953983558181849, 1.022159237220876, 0.7595819526247353, -1.3475812323762477, 0.862595703444562, -1.3024617674246017, 0.8393233283380684, -1.0449774944309806, 0.2569942935332738, -0.4154104216552788, 0.2504695929513906, 0.6747599738523804, 2.4665726295406074, 1.539281153473057, 0.2382188131387752, ...]"
2704,1128977,Genetic_Algorithms,"[0.7443029027135303, -1.655633103389829, 2.450133423613235, -0.240565607000398, 1.7657208209814965, 0.5075690152379632, 0.9823027683881556, 0.8368117397493308, -0.036118909611946776, -0.6196705598324392, 0.2520669161669669, 1.0148432653111976, -1.024033744628297, -0.028564639601196824, -1.7318814737790449, 0.49663655744472646, 0.3580173488095067, 0.7593406367595791, 0.8155651696689785, -0.2620672776623939, 0.12392004105015156, 0.2620278468096268, -1.777878972138166, 0.12850976262073383, -0.9137707644081124, -0.5147659312728603, -0.5210836214533827, -0.04705235873180076, -0.052539635326239814, -0.03656576177966395, -0.42190240866484846, 0.09164169264328313, 1.4414325197221949, -0.5500240919206815, -0.6615813248033909, -1.128529095933801, 1.6161039276464322, 0.1701652588347768, 0.11296461853034685, 0.9271353498329433, 1.111266203391471, 1.1350340773686183, -0.1516223055317854, 0.15099625318759347, -0.1851707226155516, -1.488010451057454, 0.19148809043604445, -0.8324307795370842, -0.18675833902697092, 1.5110140214161996, 0.1641455176461006, 0.5995109228160024, -0.3525026490232511, -0.8656193670846394, -1.1874388118814865, 1.3462458545941518, 0.08355668176079423, 0.05221780618353542, 0.5939478455699988, 1.1375049510451416, 0.36167629856028577, -1.3030715101547354, 0.7124011546960684, 0.6125669477680871, -1.1660945302383583, 0.25870650675554663, -1.2836484862231619, -0.12893903657136277, -0.9961789260795851, -1.7971603380022596, -0.5895738648587755, -0.7671913768725417, -0.8749516741610538, -0.8140888317873202, -0.42267463812323625, 1.2953898749392716, 0.9471397319616779, 0.9545461649158298, 1.8770557563212618, -0.5588431971337284, -1.9204345238656229, 0.4817297325997082, -0.5517983352531116, 0.2418437461887171, 0.6904694843842938, 0.8504145562044054, -1.5009289410936495, 1.229307266796567, 1.119938985332341, 0.12173177885532224, -0.30129659349995186, 0.31510467364520744, 0.29270234571195364, 1.8795834442556592, -1.1709287757230318, 0.5816647061772171, -0.16686966918968638, 0.8064252504173717, -0.3389535933265582, 0.32734939906685234, ...]"
2705,1128978,Genetic_Algorithms,"[-0.8311807191626971, -0.5304267007381969, -0.3781289856720653, -0.27435218082837076, -0.217967530297643, 1.728047680741844, 1.5703639703628556, 1.9017674012687178, 1.4303655299533806, 0.5532300762362089, -0.13292326819991326, -0.45774501607584633, -1.8684824993708948, -1.0127686452742104, -1.6167032410591555, -0.40445014681233, -0.46738327203678126, 1.9070013753841442, -0.45875105473023786, -2.197951333664901, 1.4543563685465963, 0.36079696808423706, 0.8496779671370629, 0.8627229363760024, -2.4126207976567335, -0.6210831734222291, -0.4916320800147689, -0.4410783327036827, 0.7046392698924192, 0.925659080089725, -1.1175755856723901, 1.0382948440147057, -0.00186044243192742, 0.11526027488318952, -0.08574188525861812, -0.3377459174586223, -0.5125815368370207, -0.7773187587945143, -1.5376335044130678, 1.0089124912424818, -0.74238543485485, -2.4268537022491086, -0.6922196581426031, -0.7877056374007053, 0.22925810841218244, 0.06362568172988513, 2.629040828496908, -1.5804681873394935, -0.7983601170608119, 0.13238213081147782, 0.11253663096287386, -0.18324853753182457, 0.588376396138304, -0.6825471108082042, 0.33997363279973836, -0.5961543324392411, -0.4935592068696998, -1.0437511524243417, -0.6117148526087463, -0.19908800209176378, -0.6396790136673789, 1.11109191302435, 1.588977335350728, 1.2330441043132887, -1.1481854802242961, 0.019209275707315145, -2.6339329694291185, -1.3128918466284503, -0.5356549514946604, -0.9835153708522503, 0.9089897561963921, -1.264493845333021, 0.20848063740269843, -1.3188664190042956, 1.6167901061004863, 0.47987977369600965, -0.5262108989794607, 0.05605655847415178, -2.059870289783242, 0.831077030482517, -1.087182075718012, -1.3057151363076476, 0.31939392275875284, 0.21578785167003178, -0.4799550603537397, 1.5837170156027667, 0.6249471624388582, -1.9025927422215594, 0.12423211429757418, 0.9998858037931359, -0.24122438336026525, -1.1501707494400764, 0.06302164667795682, 0.55663388170807, -1.0474208543526256, 0.030657488634201972, -0.7039603370742666, 0.20034606633863228, 1.7843838726536618, 1.7356735903847906, ...]"
2706,117328,Case_Based,"[-1.4958200671817017, 2.7931842713559947, 0.20693214235188626, -0.3578974156259146, -1.6334632934751305, -0.35829790105638204, 0.5677255601319219, 1.012512569570406, 0.9972435490050972, 0.20595368307593784, 0.99733557455342, 0.22908120181917727, 0.9110251624550965, -0.8692607664516926, 0.054627226030741424, -2.35607121708807, 0.4076813793787575, -1.060148720103877, 0.6592849577708783, -0.5132785256500876, -0.4738326199608544, -0.12950048788018825, -0.6200621336147292, -0.3700474806220932, 0.7788843754993107, -0.040668950554384624, 0.07800835269415203, -0.7343583394360832, -0.6772431204741154, -1.4058637198843649, -0.13099883637018617, 0.32899636461586024, -0.39288529002602013, 0.07344640313323673, -0.16376141141852982, -0.5622811323568552, -0.18893009974633954, 1.5637893977403776, 0.2141637950840849, -1.219290846047037, 0.8443705045382093, 0.34574799695398056, 0.327883957214037, 1.0958689572001012, 1.1941816998555597, -0.8081294181863213, -0.4762322797242614, 0.5537830373941405, 0.2562067835181098, 0.8306450203400932, 1.9660934239015782, 2.0606034278226852, 0.9961227708949668, -1.6728599130416462, 1.2744313653537807, 0.38160954486245774, -0.8082586801239391, -0.8880905186039055, 1.1264685341833631, -0.5480637426294589, 0.5228635115001092, -1.2173334371941213, -0.4572054320176137, 0.5398135194261062, 1.4189738180138902, 0.6051820493827768, -0.2839402684976785, 0.8605224519048453, -0.48061339524706154, -0.7065833540275783, 1.4148131857350459, 0.053033940800122505, 0.7457312343118607, 0.4369127677957362, -1.0635414921786095, 0.16078261143796294, 0.8057115809405644, -0.2996487079316929, -0.4429333134211381, 0.38320791124786296, 1.3412056817089453, 1.8957469094818094, 0.5707430293030198, 0.5400225165220738, -1.2265034658835894, 0.6091395407769565, 0.868064508130333, -1.0617290494344935, -0.4947629099487789, -0.6942208656201396, 0.5737489414910728, -0.4419873109070664, 1.3662238005505591, 0.11351136157511929, 0.004915746627481987, -0.6894992061388844, 0.28854084051993023, 1.0352603685069854, 0.10323863933025104, 0.3672720990431366, ...]"


In [91]:
len(citation_network)

5429