In [None]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

class GraphLinkPredictor:
    def __init__(self, dataset='karate'):
        """
        Initialize the predictor with a specific dataset.

        Parameters:
            dataset (str): Name of the dataset ('karate' or 'les').
        """
        available_datasets = {'karate': nx.karate_club_graph, 'les': nx.les_miserables_graph}
        if dataset in available_datasets:
            self.graph = available_datasets[dataset]()
        else:
            raise ValueError("Only 'karate' and 'les' datasets are supported.")

    def analyze_graph(self):
        """Basic analysis of the graph."""
        print("\n=== Graph Analysis ===")
        print(f"Number of nodes: {self.graph.number_of_nodes()}")
        print(f"Number of edges: {self.graph.number_of_edges()}")
        print(f"Average clustering coefficient: {nx.average_clustering(self.graph):.3f}")

        pos = nx.spring_layout(self.graph, seed=42)
        plt.figure(figsize=(12, 6))

        # Draw the graph structure
        plt.subplot(121)
        nx.draw(self.graph, pos, with_labels=True, node_color='skyblue', node_size=600, font_size=9)
        plt.title("Graph Structure")

        # Degree distribution
        plt.subplot(122)
        degrees = [deg for _, deg in self.graph.degree()]
        sns.histplot(degrees, bins=10, color="lightblue", edgecolor="black")
        plt.title("Degree Distribution")
        plt.xlabel("Degree")
        plt.ylabel("Frequency")

        plt.tight_layout()
        plt.show()

    def prepare_training_data(self):
        """Prepare training and testing datasets based on graph edges."""
        print("\n=== Preparing Data ===")
        all_edges = list(self.graph.edges())
        train_edges, test_edges = train_test_split(all_edges, test_size=0.2, random_state=42)

        self.training_graph = self.graph.copy()
        self.training_graph.remove_edges_from(test_edges)

        non_edges = list(nx.non_edges(self.training_graph))
        np.random.shuffle(non_edges)
        test_non_edges = non_edges[:len(test_edges)]

        self.train_edges = train_edges
        self.test_edges = test_edges
        self.test_non_edges = test_non_edges

        print(f"Training edges: {len(train_edges)}")
        print(f"Testing edges: {len(test_edges)}")

    def calculate_score(self, u, v, method='common_neighbors'):
        """Calculate the score for a given node pair based on the selected method."""
        if method == 'common_neighbors':
            return len(list(nx.common_neighbors(self.training_graph, u, v)))
        elif method == 'jaccard':
            return next(nx.jaccard_coefficient(self.training_graph, [(u, v)]))[2]
        elif method == 'adamic_adar':
            return next(nx.adamic_adar_index(self.training_graph, [(u, v)]))[2]
        else:
            raise ValueError("Unsupported method")

    def extract_edge_features(self, edges):
        """Extract features for a list of edges."""
        features = []
        for u, v in edges:
            features.append([
                self.calculate_score(u, v, 'common_neighbors'),
                self.calculate_score(u, v, 'jaccard'),
                self.calculate_score(u, v, 'adamic_adar')
            ])
        return np.array(features)

    def evaluate_predictions(self):
        """Evaluate the prediction methods on the test set."""
        print("\n=== Evaluation ===")

        X_test = self.extract_edge_features(self.test_edges + self.test_non_edges)
        y_test = np.hstack([np.ones(len(self.test_edges)), np.zeros(len(self.test_non_edges))])

        methods = ['Common Neighbors', 'Jaccard', 'Adamic/Adar']
        results = []

        for i, method in enumerate(methods):
            scores = X_test[:, i]
            threshold = np.percentile(scores, 70)
            predictions = (scores > threshold).astype(int)

            acc = accuracy_score(y_test, predictions)
            prec = precision_score(y_test, predictions)
            rec = recall_score(y_test, predictions)

            results.append([method, acc, prec, rec])

        df_results = pd.DataFrame(results, columns=['Method', 'Accuracy', 'Precision', 'Recall'])
        print(df_results)

    def execute(self):
        """Execute the complete workflow."""
        self.analyze_graph()
        self.prepare_training_data()
        self.evaluate_predictions()

if __name__ == "__main__":
    print("Select dataset:\n1. Karate Club\n2. Les Miserables")
    user_choice = input("Enter your choice (1/2): ")
    dataset_mapping = {'1': 'karate', '2': 'les'}

    if user_choice in dataset_mapping:
        predictor = GraphLinkPredictor(dataset_mapping[user_choice])
        predictor.execute()
    else:
        print("Invalid choice!")
