In [10]:
# ==============================================
# Network Construction + Network Measures
# ==============================================

# ---- 1. Import libraries ----
import pandas as pd
import numpy as np
import networkx as nx
from google.colab import files
import io

# ==============================================
# PART 1: Network Construction
# ==============================================

# ---- 2. Upload CSV files ----
print("Upload 'sample_users.csv' and 'sample_relationships.csv'")
uploaded = files.upload()

# ---- 3. Load datasets ----
user_df = pd.read_csv(io.BytesIO(uploaded['sample_users.csv']))
relationships_df = pd.read_csv(io.BytesIO(uploaded['sample_relationships.csv']))

# ---- 4. Build the graph ----
G = nx.Graph()
G.add_nodes_from(user_df['userId'])

# Add edges with or without weights
edges = relationships_df[['user1_id', 'user2_id']].values.tolist()
if 'weight' in relationships_df.columns:
    weights = relationships_df['weight'].values.tolist()
    for (u, v), w in zip(edges, weights):
        G.add_edge(u, v, weight=w)
else:
    G.add_edges_from(edges)

# ---- 5. Basic graph summary ----
print(f"Graph successfully created!")
print(f"Nodes: {G.number_of_nodes()} | Edges: {G.number_of_edges()}")

# ==============================================
# PART 2: Network Measures
# ==============================================

print("\nCalculating network measures...\n")

# ---- 1. Number of connected components ----
num_components = nx.number_connected_components(G)
print(f"Number of connected components: {num_components}")

# Get the largest connected component for path-based metrics
largest_cc = max(nx.connected_components(G), key=len)
largest_subgraph = G.subgraph(largest_cc)

# ---- 2. Average degree ----
avg_degree = sum(dict(G.degree()).values()) / G.number_of_nodes()
print(f"Average degree: {avg_degree:.2f}")

# ---- 3. Degree statistics ----
degrees = [deg for _, deg in G.degree()]
mean_degree = np.mean(degrees)
median_degree = np.median(degrees)
std_dev_degree = np.std(degrees)
print(f"Mean degree: {mean_degree:.2f}")
print(f"Median degree: {median_degree:.2f}")
print(f"Std. deviation of degree: {std_dev_degree:.2f}")

# ---- 4. Clustering coefficients ----
clustering_coeffs = list(nx.clustering(G).values())
mean_clustering = np.mean(clustering_coeffs)
median_clustering = np.median(clustering_coeffs)
std_clustering = np.std(clustering_coeffs)
print(f"Mean clustering coefficient: {mean_clustering:.4f}")
print(f"Median clustering coefficient: {median_clustering:.4f}")
print(f"Std. deviation of clustering: {std_clustering:.4f}")

# ---- 5. Transitivity ----
transitivity = nx.transitivity(G)
print(f"Transitivity: {transitivity:.4f}")

# ---- 6. Density ----
density = nx.density(G)
print(f"Graph density: {density:.4f}")

# ---- 7. Shortest path lengths ----
if nx.is_connected(G):
    path_lengths = dict(nx.all_pairs_shortest_path_length(G))
else:
    path_lengths = dict(nx.all_pairs_shortest_path_length(largest_subgraph))

# Extract all shortest path values into a flat list
all_paths = []
for source in path_lengths:
    all_paths.extend(path_lengths[source].values())

mean_path = np.mean(all_paths)
median_path = np.median(all_paths)
std_path = np.std(all_paths)
print(f"Mean shortest path length: {mean_path:.4f}")
print(f"Median shortest path length: {median_path:.4f}")
print(f"Std. deviation of shortest paths: {std_path:.4f}")

print("\n Network measures calculated successfully!")


Upload 'sample_users.csv' and 'sample_relationships.csv'


Saving sample_relationships.csv to sample_relationships.csv
Saving sample_users.csv to sample_users.csv
Graph successfully created!
Nodes: 5 | Edges: 6

Calculating network measures...

Number of connected components: 1
Average degree: 2.40
Mean degree: 2.40
Median degree: 2.00
Std. deviation of degree: 0.49
Mean clustering coefficient: 0.3333
Median clustering coefficient: 0.3333
Std. deviation of clustering: 0.3651
Transitivity: 0.3333
Graph density: 0.6000
Mean shortest path length: 1.1200
Median shortest path length: 1.0000
Std. deviation of shortest paths: 0.7111

 Network measures calculated successfully!
