In [None]:
import igraph 
import csv
import unicodedata
import numpy as np

In [None]:
def read_csv_to_collection(path):
    print("Reading file " + path + ", begin...")
    collection = []
    with open(path, mode = 'r', encoding = "utf8") as csv_file:
        csv_reader = csv.reader(csv_file, delimiter = ';')
        line_count = 0
        for row in csv_reader:
            if line_count > 0:
                collection.append(row)
            line_count += 1
    
    print("Data lines read: " + str(line_count - 1))
    return collection

## NBA 

In [None]:
def read_nba_vertices(collection, graph, add_names):
    print("Reading NBA vertices, begin...")
    for row in collection:
        string = unicodedata.normalize('NFKD', row[1]).replace(u'ł', 'l').encode('ascii', 'ignore')
        if add_names:
            graph.add_vertex(name = string.decode('utf-8'))
        else:
            graph.add_vertex()
            
    print("Vertices read: " + str(len(graph.vs)))

In [None]:
def read_nba_edges(collection, graph, add_years, add_friends):
    print("Reading NBA edges, begin...")
    edges = []
    weights = []
    friends = []
    for row in collection:
        year = row[2][0:4]
        friend = row[4]
        edges.append((int(row[1])-1, int(row[0])-1))
        if add_years:
            weights.append(2020-int(year))
        if add_friends:
            friends.append(int(friend))
            
    graph.add_edges(edges)
    if len(weights) == len(graph.es):
        print("Weights loaded correctly")
        graph.es["weight"] = weights
        graph.es["friends"] = friends
        
    print("Edges read: " + str(len(graph.es)))

In [None]:
def get_nba_seasons(collection):
    return np.array(collection)[:,2].tolist()

In [None]:
def get_nba_first_club(path, graph):
    return np.array(collection)[:,3].tolist()

In [None]:
def read_nba_full(vertices_path, edges_path, add_names, add_years, add_friends):
    print("Reading full NBA graph, begin...")
    vertices = read_csv_to_collection(vertices_path)
    edges  = read_csv_to_collection(edges_path)
    g = igraph.Graph()
    read_nba_vertices(vertices, g, add_names)
    read_nba_edges(edges, g, add_years, add_friends)
    print("Loading succesfull")
    return g

## Filmweb

In [None]:
def get_filmweb_names(collection):
    names=[]
    for row in collection:
        string = unicodedata.normalize('NFKD', row[0]).replace(u'ł', 'l').encode('ascii', 'ignore')
        names.append(string.decode('utf-8'))
        
    return names

In [17]:
def read_filmweb_vertices(collection, graph, read_names):
    print("Reading Filmweb vertices, begin...")
    for row in collection:
        graph.add_vertex(id = row[2])
        
    if read_names:
        names = get_filmweb_names(collection)
        if len(names) == len(graph.vs):
            graph.vs["name"] = names
        
    print("Vertices read: " + str(len(graph.vs)))


In [None]:
def read_filmweb_edges(collection, graph, add_common_movies, add_years):
    print("Reading Filmweb edges, begin...")
    if add_common_movies and add_years:
        print("Error: filmweb graph can only have one weight")
    
    edges = []
    weights = []
    for row in collection:
        label1 = graph.vs["id"].index(row[0])
        label2 = graph.vs["id"].index(row[1])
        edges.append((label1, label2))
        if add_common_movies:
            weights.append(int(row[3]))
        elif add_years:
            weights.append(2020-int(row[2]))

    graph.add_edges(edges)
    if len(weights) == len(graph.es):
        print("Weights loaded correctly")
        graph.es["weight"] = weights
        
    print("Edges read: " + str(len(graph.es)))

In [None]:
def get_filmweb_common_movies_count(collection):
    return np.array(collection)[:,3].tolist()

In [None]:
def get_filmweb_dates(collection):
    return np.array(collection)[:,2].tolist()

In [None]:
def read_filmweb_full(vertices_path, edges_path, add_names, add_common_movies, add_years):
    print("Reading full Filmweb graph, begin...")
    vertices = read_csv_to_collection(vertices_path)
    edges  = read_csv_to_collection(edges_path)
    g = igraph.Graph()
    read_filmweb_vertices(vertices, g, add_names)
    read_filmweb_edges(edges, g, add_common_movies, add_years)
    print("Loading succesfull")
    return g