In [None]:
import json
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import string
from gensim import corpora, models
import gensim


# Read JSON

In [None]:
with open('fpmsdb.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    
# print(data['features'][4115]['properties']['title'])

research_titles = []
for i in range(0, 4115):
    research_titles.append(data['features'][i]['properties']['title'])

print(research_titles)

In [4]:
# download necessary resources from NLTK
nltk.download('stopwords')
nltk.download('wordnet')

# define stop words and lemmatizer
stop = set(stopwords.words('english'))
exclude = set(string.punctuation)
lemma = WordNetLemmatizer()

# preprocess function for text data
def preprocess(text):
    stop_free = " ".join([word for word in text.lower().split() if word not in stop])
    punc_free = ''.join(char for char in stop_free if char not in exclude)
    normalized = " ".join(lemma.lemmatize(word) for word in punc_free.split())
    return normalized

# preprocess the titles
titles_preprocessed = [preprocess(title) for title in research_titles]

# create a dictionary and corpus
dictionary = corpora.Dictionary([doc.split() for doc in titles_preprocessed])
corpus = [dictionary.doc2bow(doc.split()) for doc in titles_preprocessed]

# build the LDA model
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                            id2word=dictionary,
                                            num_topics=10,
                                            random_state=100,
                                            update_every=1,
                                            chunksize=100,
                                            passes=10,
                                            alpha='auto',
                                            per_word_topics=True)

# print the top 10 topics and their most significant words
topics = lda_model.show_topics(num_topics=10, num_words=10, formatted=False)
for topic in topics:
    print("Topic {}: {}".format(topic[0], ", ".join([word[0] for word in topic[1]])))
    
# categorize the research titles based on their highest probability topic
for i, title in enumerate(titles_preprocessed):
    bow = dictionary.doc2bow(preprocess(title).split())
    topic_probs = lda_model.get_document_topics(bow)
    topic_probs_sorted = sorted(topic_probs, key=lambda x: x[1], reverse=True)
    topic_num = topic_probs_sorted[0][0]
    print("{} - Topic {}: {}".format(i+1, topic_num, ", ".join([word[0] for word in topics[topic_num][1]])))


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\thesa\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\thesa\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Topic 0: pradesh, himachal, response, emerging, central, factor, madhya, technology, region, haryana
Topic 1: wheat, india, complex, note, pattern, variation, behavior, role, framework, eastern
Topic 2: agriculture, a, combustion, department, planning, woman, free, composition, knowledge, highway
Topic 3: detection, review, application, different, classification, specie, algorithm, use, condition, novel
Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
Topic 6: adaptive, change, security, patient, climate, optimization, level, uav, lightweight, scheme
Topic 7: web, acid, prediction, iv, nanoparticles, content, body, dietary, vitro, formation
Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
1 - Topic 

496 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
497 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
498 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
499 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
500 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
501 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
502 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
503 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
504 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
505 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, ov

1012 - Topic 7: web, acid, prediction, iv, nanoparticles, content, body, dietary, vitro, formation
1013 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
1014 - Topic 3: detection, review, application, different, classification, specie, algorithm, use, condition, novel
1015 - Topic 7: web, acid, prediction, iv, nanoparticles, content, body, dietary, vitro, formation
1016 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
1017 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
1018 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
1019 - Topic 3: detection, review, application, different, classification, specie, algorithm, use, condition, novel
1020 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
1021 - Topic 4: study, nepal, effect,

1547 - Topic 6: adaptive, change, security, patient, climate, optimization, level, uav, lightweight, scheme
1548 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
1549 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
1550 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
1551 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
1552 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
1553 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
1554 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
1555 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
1556 - T

2020 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
2021 - Topic 1: wheat, india, complex, note, pattern, variation, behavior, role, framework, eastern
2022 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
2023 - Topic 6: adaptive, change, security, patient, climate, optimization, level, uav, lightweight, scheme
2024 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
2025 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
2026 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
2027 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
2028 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
2029 - Topic 5: system, network, based, mimo, neural, communication, distribut

2500 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
2501 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
2502 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
2503 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
2504 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
2505 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
2506 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
2507 - Topic 1: wheat, india, complex, note, pattern, variation, behavior, role, framework, eastern
2508 - Topic 5: system, network, based, mimo, neural, communication, distribution, wireless, overview, coordination
2509 - Topic 5: system, network, bas

3028 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
3029 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3030 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3031 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
3032 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
3033 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3034 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3035 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3036 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3037 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
3038 - Top

3552 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3553 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3554 - Topic 7: web, acid, prediction, iv, nanoparticles, content, body, dietary, vitro, formation
3555 - Topic 8: using, model, recognition, image, cnn, yield, l, numerical, project, flow
3556 - Topic 0: pradesh, himachal, response, emerging, central, factor, madhya, technology, region, haryana
3557 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3558 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3559 - Topic 3: detection, review, application, different, classification, specie, algorithm, use, condition, novel
3560 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3561 - Topic 4: study, nepal, effect, case, analysis, performa

3980 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3981 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3982 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3983 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
3984 - Topic 9: building, kathmandu, learning, valley, deep, technique, transfer, construction, within, gi
3985 - Topic 6: adaptive, change, security, patient, climate, optimization, level, uav, lightweight, scheme
3986 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3987 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3988 - Topic 4: study, nepal, effect, case, analysis, performance, design, energy, approach, development
3989 - Topic 4: study, nepal, effect, case, anal