In [12]:
import pandas as pd 
df= pd.read_csv(r'C:\Users\Zain\Downloads\ACM.csv')
print(df.head())
df['authors'] = df['authors'].str.split(', ')

# Explode the 'Authors' column to create separate rows for each author
df_exploded = df.explode('authors')

# Reorder columns for clarity (optional)
df_exploded = df_exploded[['authors', 'venue', 'title']]

print(df_exploded)

       id                                              title  \
0  304586  The WASA2 object-oriented workflow management ...   
1  304587  A user-centered interface for querying distrib...   
2  304589  World Wide Database-integrating the Web, CORBA...   
3  304590           XML-based information mediation with MIX   
4  304582  The CCUBE constraint object-oriented database ...   

                                             authors  \
0                    Gottfried Vossen, Mathias Weske   
1                  Isabel F. Cruz, Kimberly M. James   
2  Athman Bouguettaya, Boualem Benatallah, Lily H...   
3  Chaitan Baru, Amarnath Gupta, Bertram Lud&#228...   
4  Alexander Brodsky, Victor E. Segal, Jia Chen, ...   

                                            venue  year  
0  International Conference on Management of Data  1999  
1  International Conference on Management of Data  1999  
2  International Conference on Management of Data  1999  
3  International Conference on Management of D

In [24]:
import tkinter as tk
from tkinter import ttk
import networkx as nx
import numpy as np
import pandas as pd



def normalize(matrix):
    row_sums = matrix.sum(axis=1)
    return matrix / row_sums[:, np.newaxis]

def random_walk_with_restart(adjacency_matrix, start_node, restart_prob, tolerance):
    num_nodes = adjacency_matrix.shape[0]
    current_scores = np.zeros(num_nodes)
    current_scores[start_node] = 1.0

    while True:
        next_scores = (1 - restart_prob) * np.dot(adjacency_matrix, current_scores) + restart_prob * current_scores

        
        if np.linalg.norm(next_scores - current_scores) < tolerance:
            break

        current_scores = next_scores

    return current_scores

def compute_relevance_scores(graph_matrix, start_node, restart_prob=0.15, tolerance=0.1):
    num_nodes = graph_matrix.shape[0]

    
    norm_matrix = normalize(graph_matrix)

    
    relevance_scores = random_walk_with_restart(norm_matrix, start_node, restart_prob, tolerance)

    return relevance_scores


data = pd.read_csv(r'C:\Users\Zain\Downloads\ACM.csv')
data['authors'] = data['authors'].str.split(', ')


df = data.explode('authors').reset_index(drop=True)


df = df[['authors', 'venue', 'title']]

# Sample data columns
conference_column = 'venue'
author_column = 'authors'
topic_column = 'title' 


G_bipartite = nx.Graph()
G_bipartite.add_nodes_from(df[conference_column].unique(), bipartite=0)
G_bipartite.add_nodes_from(df[author_column].unique(), bipartite=1)


for _, row in df.iterrows():
    G_bipartite.add_edge(row[conference_column], row[author_column], weight=1)


G_tripartite = nx.Graph()
G_tripartite.add_nodes_from(df[conference_column].unique(), bipartite=0)
G_tripartite.add_nodes_from(df[author_column].unique(), bipartite=1)
G_tripartite.add_nodes_from(df[topic_column].unique(), bipartite=2)


for _, row in df.iterrows():
    G_tripartite.add_edge(row[conference_column], row[author_column], weight=1)
    G_tripartite.add_edge(row[author_column], row[topic_column], weight=1)


graph_matrix_bipartite = nx.to_numpy_array(G_bipartite)
graph_matrix_tripartite = nx.to_numpy_array(G_tripartite)


class AuthorPageApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Author Page")

        
        self.create_widgets()

    def create_widgets(self):
        
        self.label_author = ttk.Label(self.root, text="Enter author's name:")
        self.entry_author = ttk.Entry(self.root, width=30)

       
        self.button_get_page = ttk.Button(self.root, text="Get Author Page", command=self.get_author_page)

        self.result_text = tk.Text(self.root, height=10, width=80, state=tk.DISABLED)

        
        self.label_author.grid(row=0, column=0, padx=10, pady=5, sticky=tk.E)
        self.entry_author.grid(row=0, column=1, padx=10, pady=5, sticky=tk.W)
        self.button_get_page.grid(row=1, column=0, columnspan=2, pady=10)
        self.result_text.grid(row=2, column=0, columnspan=2, padx=10, pady=5)

    def get_author_page(self):
        author_name = self.entry_author.get()

        try:
            
            author_index = df[author_column].tolist().index(author_name)

           
            relevance_scores_bipartite = compute_relevance_scores(graph_matrix_bipartite, author_index)

            
            coauthor_indices = np.argsort(relevance_scores_bipartite)[-6:-1]
            coauthors = set(df.iloc[coauthor_indices][author_column])

           
            result_text = f"Author: {author_name}\n\n"
            result_text += f"Unique Co-authors:\n{', '.join(coauthors)}\n\n"

            
            for coauthor in coauthors:
                coauthor_index = df[author_column].tolist().index(coauthor)
                coauthored_topic_indices = np.nonzero(graph_matrix_tripartite[author_index, coauthor_index + df[conference_column].nunique():])[0]
                coauthored_topics = df.iloc[coauthored_topic_indices]
                
               
                for _, coauthored_topic in coauthored_topics.iterrows():
                    result_text += f"{coauthor} ({coauthored_topic[conference_column]}): {coauthored_topic[topic_column]}\n"


            self.result_text.config(state=tk.NORMAL)
            self.result_text.delete(1.0, tk.END)
            self.result_text.insert(tk.END, result_text)
            self.result_text.config(state=tk.DISABLED)
        except ValueError:
           
            self.result_text.config(state=tk.NORMAL)
            self.result_text.delete(1.0, tk.END)
            self.result_text.insert(tk.END, f"Error: Author '{author_name}' not found")
            self.result_text.config(state=tk.DISABLED)


root = tk.Tk()
app = AuthorPageApp(root)

root.mainloop()
