In [20]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Load the dataset
df = pd.read_csv('1-27PM.csv')

# Vectorize the publication titles
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['title'])

# Apply LDA to the publication titles
lda = LatentDirichletAllocation(n_components=3, random_state=42)
lda.fit(X)

# Get the topics for each publication
topics = lda.transform(X)
df['topic'] = topics.argmax(axis=1)

# Define the topic names
topic_names = { 0: "ioe news and bulletin",
                1:" network and security",
                2: "neural network",
              }
# Group authors by topic
grouped = df.groupby(['full_name', 'topic']).size().reset_index(name='count')

# Get the dominant topic for each author
dominant = grouped.loc[grouped.groupby('full_name')['count'].idxmax()]
dominant['topic'] = dominant['topic'].apply(lambda x: topic_names[x])

# Print the results
print(dominant[['full_name', 'topic']])


                full_name                  topic
2             Aman Shakya         neural network
3         Anand Kumar Sah   network and security
6    Arun Kumar Timalsina         neural network
7          Babu R. Dawadi  ioe news and bulletin
12          Basanta Joshi         neural network
14       Daya Sagar Baral   network and security
18       Dibakar Raj Pant         neural network
20  Nanda Bikram Adhikari   network and security
24  Sanjeeb Prasad Panday         neural network
26        Sanjivan Satyal   network and security
28           Santosh Giri   network and security
32   Sharad Kumar Ghimire         neural network
33   Shashidhar Ram Joshi  ioe news and bulletin
38         Subarna Shakya         neural network
39           Suman Sharma  ioe news and bulletin
42      Surendra Shrestha   network and security


In [21]:
dominant.to_csv('dominant.csv')