## 1. Load data and create network

In [41]:
import networkx as nx
import pandas as pd

In [50]:
df_movies = pd.read_csv('datasets/movies.csv')
df_ratings = pd.read_csv('datasets/ratings.csv')

display(df_movies.head())
display(df_ratings.head())

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


### Add year as new column

In [51]:
df_movies['year'] = df_movies['title'].str.extract(r'\((\d{4})\)$')
df_movies['title'] = df_movies['title'].str.replace(r'\((\d{4})\)$', '', regex=True).str.strip()
df_movies.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji,Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II,Comedy,1995


### Transform 'genres' -> one-hot encoding

In [52]:
genres = df_movies['genres'].str.split('|').explode()
unique_genres = genres.unique()

one_hot_encodings = []
for index, row in df_movies.iterrows():
    movie_genres = row['genres'].split('|')
    encoding = [1 if genre in movie_genres else 0 for genre in unique_genres]
    one_hot_encodings.append(encoding)
    
one_hot_df = pd.DataFrame(one_hot_encodings, columns=unique_genres)
df_movies = pd.concat([df_movies, one_hot_df], axis=1)
df_movies.drop(columns=['genres'], inplace=True)
df_movies.head()

Unnamed: 0,movieId,title,year,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1,Toy Story,1995,1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji,1995,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men,1995,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,4,Waiting to Exhale,1995,0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,5,Father of the Bride Part II,1995,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Create bipartite (2-modal) network

In [80]:
G = nx.Graph()

for _, row in df_movies.iterrows():
    G.add_node(row['movieId'], bipartite=0, year=row['year'])
    
for _, row in df_ratings.iterrows():
    G.add_node(row['userId'], bipartite=1)
    
    movie_id = int(row['movieId'])
    G.add_edge(row['userId'], movie_id, rating=row['rating'])

In [81]:
def print_first_x_nodes(graph, num):
    x = num
    count = 0
    
    for u, data in graph.nodes(data=True):
        if count >= x:
            break
        print(f"Node: {u}, Year: {data['year']}")
        count += 1

        
def print_first_x_edges(graph, num):
    x = num
    count = 0
    
    for u, v, data in graph.edges(data=True):
        if count >= x:
            break
        print(f"Edge: ({u}, {v}), Rating: {data['rating']}")
        count += 1
        
        
print_first_x_nodes(G, 5) 
print_first_x_edges(G, 5)

Node: 1, Year: 1995
Node: 2, Year: 1995
Node: 3, Year: 1995
Node: 4, Year: 1995
Node: 5, Year: 1995
Edge: (1, 1), Rating: 4.0
Edge: (1, 3), Rating: 4.0
Edge: (1, 6), Rating: 4.0
Edge: (1, 47), Rating: 5.0
Edge: (1, 50), Rating: 3.0


## 2. Centrality measures

## 3. Projection to a single-modal network

## 4. Communities detection