In [None]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['your_database_name']
collection = db['your_collection_name']

# Fetch the textbooks data
textbooks = list(collection.find())


In [None]:
# Example structure of textbook document
# {
#   "_id": ObjectId("..."),
#   "grade": 1,
#   "chapter": "Introduction to Science",
#   "content": "Text of the chapter..."
# }

texts = [textbook['content'] for textbook in textbooks if 'content' in textbook]


In [None]:
import re
import spacy
from nltk.corpus import stopwords

# Load spaCy model and stop words
nlp = spacy.load('en_core_web_sm')
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = re.sub(r'\s+', ' ', text)
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if token.text not in stop_words and token.is_alpha]
    return tokens

# Preprocess all texts
preprocessed_texts = [preprocess(text) for text in texts]


In [None]:
import gensim
from gensim import corpora
from gensim.models.ldamodel import LdaModel

# Create Dictionary and Corpus
dictionary = corpora.Dictionary(preprocessed_texts)
corpus = [dictionary.doc2bow(text) for text in preprocessed_texts]

# Apply LDA
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=10, random_state=100, passes=10, per_word_topics=True)

# Save the model for later use
lda_model.save('lda_model.model')

# Display topics
topics = lda_model.print_topics(num_words=10)
for topic in topics:
    print(topic)


In [None]:
import pyLDAvis.gensim_models as gensimvis
import pyLDAvis

# Visualize the topics
vis = gensimvis.prepare(lda_model, corpus, dictionary)
pyLDAvis.save_html(vis, 'lda_visualization.html')


In [None]:
for idx, textbook in enumerate(textbooks):
    bow = dictionary.doc2bow(preprocessed_texts[idx])
    topics = lda_model.get_document_topics(bow)
    # Assuming you want to store the top 3 topics
    top_topics = sorted(topics, key=lambda x: -x[1])[:3]
    topic_ids = [topic[0] for topic in top_topics]
    
    # Update the document in MongoDB
    collection.update_one({'_id': textbook['_id']}, {'$set': {'topics': topic_ids}})
