In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import pickle

In [2]:
with open('graphs/course_graph.pickle', "rb") as file:
    course_graph = pickle.load(file)
    
print(course_graph)

with open('graphs/user_graph.pickle', "rb") as file:
    user_graph = pickle.load(file)
    
print(user_graph)

Graph with 527 nodes and 19056 edges
Graph with 1000 nodes and 323055 edges


### Analyse graphs

In [3]:
def get_sparsity(G):
    num_nodes = len(G.nodes())
    num_edges = len(G.edges())
    return 1.0 - (2.0 * num_edges) / (num_nodes * (num_nodes - 1))
    
print(get_sparsity(course_graph),get_sparsity(user_graph))

0.8625118144890729 0.3532432432432432


### Create bipartite graph

In [4]:
# Step 1: Create an empty bipartite graph
bipartite_graph = nx.Graph()

# Step 2: Add course nodes to the bipartite graph with attributes
for course_node, course_attrs in course_graph.nodes(data=True):
    bipartite_graph.add_node(course_node, bipartite=0, node_id=course_node, **course_attrs)

# Step 3: Add user nodes to the bipartite graph with attributes
for user_node, user_attrs in user_graph.nodes(data=True):
    bipartite_graph.add_node(user_node, bipartite=1, **user_attrs)

    # Extract the list of course IDs for the user
    course_ids = user_attrs.get('courses', [])
    ratings = user_attrs.get('ratings', [])
    positive_sentiments = user_attrs.get('positive_sentiments', [])
    neutral_sentiments = user_attrs.get('neutral_sentiments', [])
    negative_sentiments = user_attrs.get('negative_sentiments', [])

    # Create edges between user and corresponding courses if they exist
    for counter, course_id in enumerate(course_ids):
        for node in bipartite_graph.nodes():
            if bipartite_graph.nodes[node]['bipartite'] == 0 and bipartite_graph.nodes[node]['id'] == course_id:
                combined_weight = (ratings[counter] * positive_sentiments[counter]) - negative_sentiments[counter] * ratings[counter]
                bipartite_graph.add_edge(user_node, node, rating=ratings[counter], positive_sentiment=positive_sentiments[counter],
                                         neutral_sentiment=neutral_sentiments[counter], negative_sentiment=negative_sentiments[counter], weight=combined_weight)
                break


# Step 4: Add edges between courses if they exist
for source, target, edge_attrs in course_graph.edges(data=True):
    if source in bipartite_graph and target in bipartite_graph:
        bipartite_graph.add_edge(source, target, **edge_attrs)

# Step 5: Add edges between users if they exist
for source, target, edge_attrs in user_graph.edges(data=True):
    if source in bipartite_graph and target in bipartite_graph:
        bipartite_graph.add_edge(source, target, **edge_attrs)


In [5]:
print(bipartite_graph)

# Count the number of course nodes
course_nodes = [node for node in bipartite_graph.nodes() if bipartite_graph.nodes[node]['bipartite'] == 0]
course_edges = [edge for edge in bipartite_graph.edges() if bipartite_graph.nodes[edge[0]]['bipartite'] == 0]
print("Number of course nodes:", len(course_nodes))
print("Number of course edges:", len(course_edges))

# Count the number of user nodes
user_nodes = [node for node in bipartite_graph.nodes() if bipartite_graph.nodes[node]['bipartite'] == 1]
user_edges = [edge for edge in bipartite_graph.edges() if bipartite_graph.nodes[edge[0]]['bipartite'] == 1]
print("Number of user nodes:", len(user_nodes))
print("Number of user edges:", len(user_edges))

# Initialize the edge count
edge_count = 0

# Iterate over the edges of the bipartite graph
for edge in bipartite_graph.edges():
    # Check if the nodes belong to different bipartite sets
    if bipartite_graph.nodes[edge[0]]['bipartite'] != bipartite_graph.nodes[edge[1]]['bipartite']:
        edge_count += 1

# Print the number of edges between the two networks
print("Number of edges between the two networks:", edge_count // 2)

Graph with 1527 nodes and 347929 edges
Number of course nodes: 527
Number of course edges: 24874
Number of user nodes: 1000
Number of user edges: 323055
Number of edges between the two networks: 2909


In [6]:
# check if the number of between edges makes sense

count=0
for user_node, user_attrs in user_graph.nodes(data=True):
    count+=len(user_attrs.get('courses', []))

print(count)

6834


In [7]:
file_path = "graphs/bipartite_graph.pickle"

with open(file_path, "wb") as file:
    pickle.dump(bipartite_graph, file)

In [8]:
with open(file_path, "rb") as file:
    graph = pickle.load(file)
    
print(graph)

Graph with 1527 nodes and 347929 edges


### Check correctness of the graph

In [9]:
# Specify the node for which you want to find the neighborhood
node = 'Robert S'

# Get the neighborhood of the node
neighborhood = list(nx.neighbors(bipartite_graph, node))

# Print the attributes of each neighbor
print("Attributes of neighbors of node", node, ":")
for neighbor in neighborhood:
    if bipartite_graph.nodes[neighbor]['bipartite'] == 0:
        attributes = bipartite_graph.nodes[neighbor]
        print(f"Node {neighbor}:",bipartite_graph.nodes[neighbor]['id'],bipartite_graph.edges[(node, neighbor)]['rating'])
        
    print('-----------------------------')

Attributes of neighbors of node Robert S :
Node 18: python-operating-system 5
-----------------------------
Node 80: python-programming-introduction 5
-----------------------------
Node 190: nanotechnology 5
-----------------------------
Node 9: linear-algebra-machine-learning 4
-----------------------------
Node 40: big-data-introduction 3
-----------------------------
Node 45: probability-intro 5
-----------------------------
Node 1: programming-languages 5
-----------------------------
Node 266: smart-cities 5
-----------------------------
Node 383: duke-programming-web 5
-----------------------------
Node 63: python 5
-----------------------------
Node 232: grammar-punctuation 5
-----------------------------
Node 450: quantitative-methods 5
-----------------------------
Node 539: python-data-visualization 2
-----------------------------
Node 413: what-is-datascience 5
-----------------------------
Node 260: computer-networking 4
-----------------------------
Node 417: classificatio

In [10]:
reviews=pd.read_csv('data/Coursera_reviews.csv')