In [12]:
import networkx as nx
import os
import matplotlib.pyplot as plt
from networkx.algorithms import bipartite
from collections import Counter
from networkx.linalg.graphmatrix import adjacency_matrix

from statistics import mean

import csv

# Loading data pathes and I/O functions from script
from scripts.io import load_movie_titles, load_raw_bipartite, save_projection, load_projection, save_edgelist, projection_path

In [2]:
title_dict, node_dict = load_movie_titles("movie-titles.txt")
G = load_raw_bipartite("full_bipartite.p")

# Split the graph into 2 sets: user and movie nodes
user_nodes, movie_nodes = nx.algorithms.bipartite.basic.sets(G)

Graph loaded.


In [8]:



user_nodes, movie_nodes = nx.algorithms.bipartite.basic.sets(G)

# Compute the degree centrality for each movie node
degree_centrality = nx.bipartite.degree_centrality(G, movie_nodes)

# Get the top 50 movie nodes based on degree centrality
top_50_movies = sorted(degree_centrality, key=degree_centrality.get, reverse=True)[:50]

# Print or use the top 50 movies
print(top_50_movies)



[50, 258, 100, 181, 294, 286, 288, 1, 300, 121, 174, 10405, 127, 56, 7, 98, 10655, 237, 117, 172, 222, 10013, 204, 313, 405, 79, 210, 151, 173, 69, 168, 748, 269, 257, 10450, 195, 423, 9, 276, 318, 22, 302, 96, 328, 15, 25, 118, 183, 10276, 216]


In [9]:
#simple projection
# Projecting on movies
simple_weights_movies_path = "simple_weights_movies.p"

if os.path.exists(projection_path+simple_weights_movies_path):
    simple_weights_movies = load_projection(simple_weights_movies_path)
else:
    simple_weights_movies = bipartite.weighted_projected_graph(G, movie_nodes, ratio=True)
    save_projection(simple_weights_movies, simple_weights_movies_path)


Projection loaded.


In [16]:


# Assuming you have already identified top 50 movie nodes in the bipartite network
top_50_movies_bipartite = top_50_movies  # Replace [...] with the actual list of top 50 movie nodes

# Load the simple projection
simple_weights_movies_path = "rating_allocation_movies.p"

if os.path.exists(projection_path + simple_weights_movies_path):
    simple_weights_movies = load_projection(simple_weights_movies_path)
else:
    simple_weights_movies = bipartite.weighted_projected_graph(G, movie_nodes, ratio=True)
    save_projection(simple_weights_movies, simple_weights_movies_path)

# Filter edges in the simple projection to include only top 50 movies from the bipartite
filtered_edges = [(node1, node2, simple_weights_movies[node1][node2]['weight'])
                  for node1, node2 in simple_weights_movies.edges
                  if node1 in top_50_movies_bipartite and node2 in top_50_movies_bipartite]

# Create an edge list with movie names and write to CSV
csv_file_path = "filtered_edges_allocation.csv"

with open(csv_file_path, 'w', newline='') as csvfile:
    fieldnames = ['Movie_Node_1', 'Movie_Node_2', 'Edge_Weight', 'Movie_Name_1', 'Movie_Name_2']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    # Write CSV header
    writer.writeheader()

    # Assuming title_dict is a dictionary mapping movie IDs to movie names
    for edge in filtered_edges:
        movie_name_1 = title_dict.get(edge[0], "Unknown Movie")
        movie_name_2 = title_dict.get(edge[1], "Unknown Movie")
        
        # Write to CSV
        writer.writerow({'Movie_Node_1': edge[0], 'Movie_Node_2': edge[1],
                         'Edge_Weight': edge[2], 'Movie_Name_1': movie_name_1,
                         'Movie_Name_2': movie_name_2})

print(f"Filtered edges saved to {csv_file_path}")


Projection loaded.
Filtered edges saved to filtered_edges_allocation.csv


In [18]:
import csv

# Assuming title_dict is a dictionary mapping movie IDs to movie names
movie_nodes_to_lookup = [1, 7, 9, 15, 22, 25, 50, 56, 69, 79, 96, 98, 100, 117, 118, 121, 127, 151, 168, 172,
                          173, 174, 181, 183, 195, 204, 210, 216, 222, 237, 257, 258, 269, 276, 286, 288,
                          294, 300, 302, 313, 318, 328, 405, 423,748]

movie_titles_to_lookup = [title_dict.get(node, "Unknown Movie") for node in movie_nodes_to_lookup]

# Create a list of tuples with node and corresponding name
result_data = list(zip(movie_nodes_to_lookup, movie_titles_to_lookup))

# Save to CSV
csv_file_path = "movie_nodes_and_names.csv"

with open(csv_file_path, 'w', newline='') as csvfile:
    fieldnames = ['Movie_Node', 'Movie_Name']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    # Write CSV header
    writer.writeheader()

    for node, title in result_data:
        # Write to CSV
        writer.writerow({'Movie_Node': node, 'Movie_Name': title})

print(f"Movie nodes and names saved to {csv_file_path}")


Movie nodes and names saved to movie_nodes_and_names.csv
