In [None]:
# Marvel Hero Network Link Prediction with LPFormer
# ======================================================

# This notebook uses the pre-created LPFormer files to run link prediction on the Marvel hero network.

## Setup Environment

# First, install required dependencies
!pip install torch torch_geometric networkx matplotlib seaborn scikit-learn scipy tqdm

## Upload the Files
from google.colab import files
import os

# Check if the required files exist
required_files = [
    'lpformer.py',          # Main implementation
    'run_lpformer.py',      # Script to run the model
    'analyze_results.py',   # Script to analyze results
    'nodes.csv',            # Node data
    'edges.csv',            # Hero-comic edges
    'hero-network.csv'       # Hero-hero connections
]

missing_files = [f for f in required_files if not os.path.exists(f)]

if missing_files:
    print(f"Please upload the following files: {', '.join(missing_files)}")
    files.upload()
else:
    print("All required files are present.")

## Execute the LPFormer Implementation

import sys
import importlib

# Make sure Python can find our modules
sys.path.append('.')

# Run the model
print("\n=== RUNNING LPFORMER MODEL ===")
print("This will train the model and generate predictions...")

# Import and run the run_lpformer module
run_lpformer_spec = importlib.util.spec_from_file_location("run_lpformer", "run_lpformer.py")
run_lpformer_module = importlib.util.module_from_spec(run_lpformer_spec)
run_lpformer_spec.loader.exec_module(run_lpformer_module)

# Run the model
model, edge_data, split_data, ppr_data, top_predictions = run_lpformer_module.run()

print("\n=== MODEL EXECUTION COMPLETE ===")

## Analyze the Results

print("\n=== ANALYZING RESULTS ===")
print("This will generate visualizations and insights about the predictions...")

# Import and run the analyze_results module
analyze_results_spec = importlib.util.spec_from_file_location("analyze_results", "analyze_results.py")
analyze_results_module = importlib.util.module_from_spec(analyze_results_spec)
analyze_results_spec.loader.exec_module(analyze_results_module)

# Analyze the predictions
predictions_df = analyze_results_module.analyze_predictions()

print("\n=== ANALYSIS COMPLETE ===")

## Display the Results

import matplotlib.pyplot as plt
from IPython.display import display, HTML

# Display top predictions
print("\n=== TOP PREDICTED HERO CONNECTIONS ===")
display(HTML(predictions_df.head(10).to_html()))

# Show visualization files that were generated
visualization_files = [
    'hero_network_predictions.png',  # Network visualization
    'prediction_analysis.png',       # Prediction analysis charts
    'correlation_matrix.png',        # Correlation heatmap
    'community_analysis.png'         # Community analysis
]

print("\n=== VISUALIZATIONS ===")
for viz_file in visualization_files:
    if os.path.exists(viz_file):
        plt.figure(figsize=(12, 10))
        img = plt.imread(viz_file)
        plt.imshow(img)
        plt.axis('off')
        plt.title(viz_file)
        plt.show()
    else:
        print(f"Visualization file {viz_file} not found")

## Conclusion

print("\n=== LINK PREDICTION COMPLETE ===")
print("The LPFormer model has been trained on the Marvel hero network.")
print("Top predictions have been generated and analyzed.")
print("Visualizations show the network structure and prediction patterns.")

All required files are present.

=== RUNNING LPFORMER MODEL ===
This will train the model and generate predictions...
Using device: cuda
Using device: cuda
Loading data...
Nodes: 19090
Comic-Hero edges: 96104
Hero-Hero edges: 574467
Preprocessing data...
Splitting edges...
Computing PPR matrix...


Computing PPR matrix: 100%|██████████| 6282/6282 [01:13<00:00, 85.94it/s]


Building LPFormer model with 12651 input features, 64 hidden dimensions, 4 attention heads, and 2 layers...
Training model...
