In [1]:
from graph_miner import NetworkRepositoryGraphRepository

In [2]:
repository = NetworkRepositoryGraphRepository()

In [3]:
repository.retrieve_all()

HBox(children=(IntProgress(value=0, description='Retrieving graphs for NetworkRepositoryGraphRepository', max=…

HBox(children=(IntProgress(value=0, description='Downloading to networkrepositor...vgDeg10-1M-L5.zip', layout=…

ValueError: In the edge file was found the node 999949 which is not present in the given node file.

In [1]:
import pandas as pd
import numpy as np
from downloaders import BaseDownloader
from ensmallen_graph import EnsmallenGraph
from downloaders.extractors.zip_extraction import ZipExtractor
from tqdm.auto import tqdm
import os
import shutil
import compress_json

In [2]:
url = "http://nrvis.com/download/data/{}/{}.zip"

In [3]:
path = "bindings/python/ensmallen_graph/network_repository/{}.json"

In [4]:
graphs = pd.read_html("http://networkrepository.com/networks.php")[0]
graphs["url"] = [
    url.format(row["Type"], row["Graph Name"])
    for _, row in graphs.iterrows()
]
graphs.drop(
    columns=["Download", "Size"],
    inplace=True
)

In [6]:
downloader = BaseDownloader(
    target_directory="graphs",
    process_number=1,
    verbose=2,
    crash_early=True
)

In [None]:
EnsmallenGraph.from_unsorted_csv()

In [7]:
already_skipped = set()

In [43]:
metadata = compress_json.local_load("metadata.json")

In [44]:
to_add_manually = {
    'imdb',
    'ip-trace',
    "web-ClueWeb09",
    "web-ClueWeb09-50m"
}

In [69]:
to_skip = {
    "ca-cit-HepPh",
    "ca-cit-HepTh",
    "cit-HepPh",
    "cit-HepTh",
    'SMS-A',
    'aves-sparrow-social',
    'aves-wildbird-network',
    'comm-linux-kernel-reply',
    'contacts-prox-high-school-2013-attr',
    'edit-enwikibooks',
    'fb-wosn-friends',
    'ia-chess',
    'ia-digg-reply',
    'ia-enron-email-all',
    'ia-enron-email-dynamic',
    'ia-enron-employees',
    'ia-escorts-dynamic',
    'ia-facebook-wall-wosn-dir',
    'ia-frwikinews-user-edits',
    'insecta-ant-colony1',
    'insecta-ant-colony2',
    'soc-epinions-trust-dir',
    'soc-flickr-growth',
    "soc-twitter",
    'rec-movielens-user-tag-10m'
}

In [72]:
for _, row in tqdm(graphs.iterrows(), total=len(graphs)):
    url = row.url
    graph_name = row["Graph Name"]
    if graph_name in metadata or graph_name in to_skip or graph_name in already_skipped or graph_name in to_add_manually:
        continue
    report = downloader.download(url)
    if "extraction_success" not in report.columns:
        already_skipped.add(graph_name)
        continue
    extraction = report.extraction_destination[0]
    graph_name = extraction.split(os.sep)[-1]
    edge_path_pattern = "{}/{}".format(
        report.extraction_destination[0],
        graph_name
    )
    edge_path = None
    for ext in ("edges", "mtx", "txt"):
        edge_path = "{}.{}".format(edge_path_pattern, ext)
        if os.path.exists(edge_path):
            break
    if not os.path.exists(edge_path):
        already_skipped.add(graph_name)
        continue
    if edge_path is None:
        raise ValueError("File not found in list {}", os.listdir(extraction))
    comment = None
    if ext == "mtx":
        edge_rows_to_skip = 1
    else:
        edge_rows_to_skip = 0
        
    with open(edge_path, "r") as f:
        first_line = f.readline()
        
    for comment_symbol in ("%", "#"):
        if first_line.startswith(comment_symbol):
            comment = comment_symbol

    comment_lines = 0
    with open(edge_path, "r") as f:
        for _ in range(1000):
            middle_line = f.readline()
            if comment:
                if middle_line.startswith(comment):
                    comment_lines+=1
                
    for separator in ("\t", " ", ",", ";"):
        if separator in middle_line:
            edge_separator = separator
            
    data = pd.read_csv(
        edge_path,
        sep=edge_separator,
        skiprows=edge_rows_to_skip + comment_lines,
        header=None,
        nrows=20000
    )
    
    if len(data.columns) == 1:
        already_skipped.add(graph_name)
        continue
    
    if len(data) < 10:
        already_skipped.add(graph_name)
        continue
    
    has_index = False
    if len(data.columns) > 2 and (data[0].values == np.arange(len(data))).all():
        # This file has a damn index!
        sources_column_number = 1
        destinations_column_number = 2
        weights_column_number = 3
        has_index = True
    else:
        sources_column_number = 0
        destinations_column_number = 1
        weights_column_number = 2
        
    if len(data.columns) > weights_column_number + 1:
        if graph_name.startswith("ia-") or graph_name.startswith('insecta-ant') or graph_name.startswith("mammalia-"):
            already_skipped.add(graph_name)
            continue
        if graph_name.startswith("rec-"):
            to_add_manually.add(graph_name)
            continue
        already_skipped.add(graph_name)
        continue
        #raise ValueError("This graph has a lot of columns, check what is happening!")
        
    has_weights = False
    has_nan_weights = False
    if len(data.columns) > weights_column_number:
        has_weights = True
        if data[weights_column_number].isna().all():
            has_weights = False
        elif data[weights_column_number].isna().any():
            has_nan_weights=True
    
    if has_weights and (data[weights_column_number] <= 0).any():
        # We do not support graphs with negative weights.
        already_skipped.add(graph_name)
        continue
    
    graph = EnsmallenGraph.from_unsorted_csv(
        edge_path=edge_path,
        directed=False,
        edge_separator=edge_separator,
        sources_column_number=sources_column_number,
        destinations_column_number=destinations_column_number,
        edge_rows_to_skip=edge_rows_to_skip,
        edge_max_rows_number = 20000,
        **(dict(weights_column_number=weights_column_number) if has_weights else {}),
        **(dict(default_weight=1) if has_nan_weights else {}),
        **(dict(edge_file_comment_symbol=comment) if comment is not None else {})
    )
    metadata[graph_name] = {
        "urls": [url],
        "arguments": {
            "edge_path": "{graph_name}/{graph_name}.{ext}".format(
                graph_name=graph_name,
                ext=ext
            ),
            "edge_separator": edge_separator,
            "sources_column_number": sources_column_number,
            "destinations_column_number": destinations_column_number,
            **({
                "weights_column_number": 2,
            } if len(data.columns) > 2 else {}),
            **({
                "edge_rows_to_skip": edge_rows_to_skip,
            } if edge_rows_to_skip else {}),
            **({
                "edge_file_comment_symbol": comment
            } if comment is not None else {}),
            "edge_header": False,
            "has_nan_weights": has_nan_weights
        }
    }
    #os.remove(report.destination[0])
    #shutil.rmtree(report.extraction_destination[0])

HBox(children=(IntProgress(value=0, max=5152), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-belgium-osm.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-chesapeake.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-euroroad.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-germany-osm.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-great-britain-osm.zip', layout=Lay…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-italy-osm.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-luxembourg-osm.zip', layout=Layout…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-minnesota.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-netherlands-osm.zip', layout=Layou…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-road-usa.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-roadNet-CA.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-roadNet-PA.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-usroads-48.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/road-usroads.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/rt-higgs.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/rt-pol.zip', layout=Layout(flex='2'), m…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/rt-retweet-crawl.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/rt-retweet.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/rt-twitter-copen.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/rt-islam.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-TSOPF-RS-b2383-c1.zip', layout=Layou…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-delaunay-n23.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-ldoor.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-msdoor.zip', layout=Layout(flex='2')…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-nasasrb.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-pkustk11.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-pkustk13.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-pwtk.zip', layout=Layout(flex='2'), …

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-rel9.zip', layout=Layout(flex='2'), …

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-shipsec1.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/sc-shipsec5.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-artist.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-company.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-food.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-government.zip', layout=Layout…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-media.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-politician.zip', layout=Layout…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-public-figure.zip', layout=Lay…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-sport.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/fb-pages-tvshow.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-ANU-residence.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-BlogCatalog.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-FourSquare.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-LiveMocha.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-academia.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-advogato.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-anybeat.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-brightkite.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-buzznet.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-catster.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-delicious.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-digg.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-dogster.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-dolphins.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-douban.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-epinions.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-firm-hi-tech.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-flickr-und.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-flickr.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-flixster.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-gemsec-HR.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-gemsec-HU.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-gemsec-RO.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-google-plus.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-gowalla.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-gplus.zip', layout=Layout(flex='2')…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-hamsterster.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-highschool-moreno.zip', layout=Layo…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-karate.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-lastfm.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-linkedin.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-livej...l-user-groups.zip', layout=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-livejournal.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-livejournal07.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-ljournal-2008.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-loc-brightkite.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-myspace.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-physicians.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-pokec.zip', layout=Layout(flex='2')…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-slashdot-trust-all.zip', layout=Lay…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-slashdot-zoo.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-slashdot.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-student-coop.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-themarker.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-tribes.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-twitt...r-follows-mun.zip', layout=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-twitter-follows.zip', layout=Layout…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-twitter-higgs.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-wiki-Talk-dir.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-wiki-Vote.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-wiki-conflict.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-youtube-snap.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/soc-youtube.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-A-anon.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-American75.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Amherst41.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Auburn71.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-B-anon.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-BC17.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-BU10.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Baylor93.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Berkeley13.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Bingham82.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Bowdoin47.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Brandeis99.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Brown11.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Bucknell39.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-CMU.zip', layout=Layout(flex='2')…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Cal65.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Caltech36.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Carnegie49.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Colgate88.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Columbia2.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Cornell5.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Dartmouth6.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Duke14.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Emory27.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-FSU53.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-GWU54.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Georgetown15.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Hamilton46.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Harvard1.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Haverford76.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Howard90.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Indiana.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Indiana69.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-JMU79.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-JohnsHopkins55.zip', layout=Layou…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Lehigh96.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-MIT.zip', layout=Layout(flex='2')…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-MIT8.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-MSU24.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-MU78.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Maine59.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Maryland58.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Mich67.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Michigan23.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Middlebury45.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Mississippi66.zip', layout=Layout…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-NYU9.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Northeastern19.zip', layout=Layou…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Northwestern25.zip', layout=Layou…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-NotreDame57.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-OR.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Oberlin44.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Oklahoma97.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Penn94.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Pepperdine86.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Princeton12.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Reed98.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Rice31.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Rochester38.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Rutgers89.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Santa74.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Simmons81.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Smith60.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Stanford3.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Swarthmore42.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Syracuse56.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Temple83.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Tennessee95.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Texas80.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Texas84.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Trinity100.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Tufts18.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Tulane29.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UC33.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UC61.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UC64.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UCF52.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UCLA.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UCLA26.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UCSB37.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UCSC68.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UCSD34.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UChicago30.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UConn.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UConn91.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UF.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UF21.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UGA50.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UIllinois.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UIllinois20.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UMass92.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UNC28.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UPenn7.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-USC35.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-USF51.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-USFCA72.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-UVA16.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Vanderbilt48.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Vassar85.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Vermont70.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Villanova62.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Virginia63.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Wake73.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-WashU32.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Wellesley22.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Wesleyan43.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-William77.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Williams40.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Wisconsin87.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-Yale4.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-konect.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-nips-ego.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-uci-uni.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/socfb-wosn-friends.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-RL-caida.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-WHOIS.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-as-caida2007.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-internet-as.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-ip.zip', layout=Layout(flex='2'), …

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-p2p-gnutella.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-pgp.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/tech-routers-rf.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-enron-only.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-fb-forum.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-fb-messages.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-infect-dublin.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-infect-hyper.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-reality.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-retweet-crawl.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-retweet.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-alwefaq.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-assad.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-bahrain.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-barackobama.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-damascus.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-dash.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-gmanews.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-gop.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-http.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-islam.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-israel.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-justinbieber.zip', layout=Layout…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-ksa.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-lebanon.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-libya.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-lolgop.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-mittromney.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-obama.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-occupy.zip', layout=Layout(flex=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-occupywallstnyc.zip', layout=Lay…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-oman.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-onedirection.zip', layout=Layout…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-p2.zip', layout=Layout(flex='2')…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-qatif.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-saudi.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-tcot.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-tlot.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-uae.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-rt-vo...eonedirection.zip', layout=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/scc-twitter-copen.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-BerkStan-dir.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-EPA.zip', layout=Layout(flex='2'), …

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-arabic-2005.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-baidu...baike-related.zip', layout=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-baidu-baike.zip', layout=Layout(fle…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-edu.zip', layout=Layout(flex='2'), …

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-frwik...ws-user-edits.zip', layout=…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-google-dir.zip', layout=Layout(flex…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-google.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-hudong.zip', layout=Layout(flex='2'…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-indochina-2004.zip', layout=Layout(…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-it-2004.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-italycnr-2000.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-polblogs.zip', layout=Layout(flex='…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-sk-2005.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-spam.zip', layout=Layout(flex='2'),…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-uk-2005.zip', layout=Layout(flex='2…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-webbase-2001.zip', layout=Layout(fl…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-wiki-ch-internal.zip', layout=Layou…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-wikipedia-growth.zip', layout=Layou…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-wikipedia2009.zip', layout=Layout(f…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-wikipedia-link-de.zip', layout=Layo…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-wikipedia-link-fr.zip', layout=Layo…

HBox(children=(IntProgress(value=0, description='Downloading to graphs/web-wikipedia-link-it.zip', layout=Layo…




In [75]:
metadata

{'bio-CE-CX': {'urls': ['http://nrvis.com/download/data/bio/bio-CE-CX.zip'],
  'arguments': {'edge_path': 'bio-CE-CX/bio-CE-CX.edges',
   'edge_separator': ' ',
   'sources_column_number': 0,
   'destinations_column_number': 1,
   'weights_column_number': 2,
   'edge_header': False,
   'numeric_edge_node_ids': True,
   'has_nan_weights': False}},
 'bio-CE-GT': {'urls': ['http://nrvis.com/download/data/bio/bio-CE-GT.zip'],
  'arguments': {'edge_path': 'bio-CE-GT/bio-CE-GT.edges',
   'edge_separator': ' ',
   'sources_column_number': 0,
   'destinations_column_number': 1,
   'weights_column_number': 2,
   'edge_header': False,
   'numeric_edge_node_ids': True,
   'has_nan_weights': False}},
 'bio-CE-HT': {'urls': ['http://nrvis.com/download/data/bio/bio-CE-HT.zip'],
  'arguments': {'edge_path': 'bio-CE-HT/bio-CE-HT.edges',
   'edge_separator': ' ',
   'sources_column_number': 0,
   'destinations_column_number': 1,
   'weights_column_number': 2,
   'edge_header': False,
   'numeric_edge_n

In [53]:
middle_line, first_line, graph_name

('', '%%MatrixMarket matrix coordinate pattern general\n', 'Trec3')

In [74]:
compress_json.local_dump(metadata, "metadata2.json")

In [None]:
comment, edge_rows_to_skip, has_nan_weights

In [None]:
middle_line

In [None]:
os.listdir("graphs/ca-MathSciNet/")

In [None]:
pd.read_csv("graphs/ca-MathSciNet/ca-MathSciNet.mtx", sep=" ", comment="%", skiprows=2, header=None)