In [None]:
# nlp_course_eval_analysis.py
# Full pipeline: load -> clean -> topic modelling -> sentiment -> gradio demo script export

# Recommended environment (requirements.txt)
# pandas
# scikit-learn
# joblib
# nltk
# gensim
# pyldavis
# gradio
# umap-learn
# hdbscan
# bertopic (optional, for embeddings-based topics)
# transformers (optional, for transformer sentiment)
# vaderSentiment

import os
import re
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import joblib
import nltk
nltk.download('vader_lexicon')

from collections import Counter

# Optional: for an embeddings based topic model (BERTopic)
# from bertopic import BERTopic

# For VADER sentiment (rule-based)
import nltk
try:
    from nltk.sentiment import SentimentIntensityAnalyzer
except:
    nltk.download('vader_lexicon')
    from nltk.sentiment import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...


In [4]:


# -------------- Step 1: Load data ----------------
url = "https://raw.githubusercontent.com/course-files/NaturalLanguageProcessing/refs/heads/main/data/202511-ft_bi1_bi2_course_evaluation.csv"
df = pd.read_csv(url, encoding='utf-8', low_memory=False)

# Identify free-text columns (adjust if names are different)
text_cols = [c for c in df.columns if c.startswith('f_')]
print("Text columns:", text_cols)

# Combine free-text columns into one 'response' column per-student
df['response'] = df[text_cols].fillna('').astype(str).apply(lambda row: ' '.join(row.values), axis=1)

# Drop empty responses
df = df[df['response'].str.strip().str.len() > 5].copy()
print("Responses:", len(df))

Text columns: ['f_1_In_your_opinion_which_topics_(if_any)_should_be_added_to_the_Business_Intelligence_I_and_II_curriculum', 'f_2_In_your_opinion_which_topics_(if_any)_should_be_removed_from_the_Business_Intelligence_I_and_II_curriculum', 'f_3_Write_at_least_two_things_you_liked_about_the_teaching_and_learning_in_this_course', 'f_4_Write_at_least_one_recommendation_to_improve_the_teaching_and_learning_in_this_course_(for_future_classes)']
Responses: 129


In [5]:


# -------------- Step 2: Clean text ----------------
import string
def clean_text(s):
    s = str(s).lower()
    s = re.sub(r"http\S+"," ", s)
    s = re.sub(r"[^a-z0-9\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

df['text'] = df['response'].apply(clean_text)

In [6]:


# -------------- Step 3: Topic modelling (LDA) -------------
# Vectorize (unigrams + bigrams)
vectorizer = CountVectorizer(max_df=0.6, min_df=2, stop_words='english', ngram_range=(1,2))
X = vectorizer.fit_transform(df['text'])

n_topics = 6  # start with 6; tune by coherence / interpretability
lda = LatentDirichletAllocation(n_components=n_topics, random_state=42, max_iter=20)
lda.fit(X)

# Helper: get top words
def top_words(model, feature_names, n=10):
    topics = []
    for i, comp in enumerate(model.components_):
        top = comp.argsort()[:-n-1:-1]
        topics.append([feature_names[j] for j in top])
    return topics

feature_names = vectorizer.get_feature_names_out()
topics = top_words(lda, feature_names, n=12)
for i,t in enumerate(topics):
    print("Topic", i, ":", ', '.join(t))

# Assign dominant topic to each doc
doc_topic_dist = lda.transform(X)
df['dominant_topic'] = doc_topic_dist.argmax(axis=1)
df['topic_prob'] = doc_topic_dist.max(axis=1)


Topic 0 : practical, data, bi, business, real, lab, theory, like, tools, real world, topics, world
Topic 1 : labs, time, content, unit, topics, work, lab, opinion, quizzes, think, time series, series
Topic 2 : slides, liked, labs, lecture, content, number, learning, better, work, notes, number slides, bi
Topic 3 : data, practical, labs, unit, tools, matter, enjoyed, detailed, world, real world, engaging, opinion
Topic 4 : lab, assignments, content, work, group, practical, okay, like, topics, notes, module, labs
Topic 5 : topics, labs, think, end, topic, practical, understanding, real, business, class, course, understand


In [7]:

# -------------- Step 4: Sentiment per-response -------------
# Use VADER (rule-based) for short student comments
from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()

def vader_sentiment(text):
    s = sia.polarity_scores(text)['compound']
    return s

sia = SentimentIntensityAnalyzer()
def vader_sentiment(text):
    s = sia.polarity_scores(text)['compound']
    if s >= 0.05:
        return 'positive', s
    elif s <= -0.05:
        return 'negative', s
    else:
        return 'neutral', s

df[['vader_sentiment','vader_score']] = df['text'].apply(lambda t: pd.Series(vader_sentiment(t)))

# Optional fallback / check: small lexicon to catch domain words
POS = {'good','great','excellent','engaging','clear','helpful','interactive','practical','relevant','well','like','liked','enjoyed','enjoy'}
NEG = {'unclear','confusing','boring','late','delay','not','problem','difficult','lack','insufficient','poor','hard'}

def lex_sentiment(text):
    toks = text.split()
    pos = sum(1 for t in toks if t in POS)
    neg = sum(1 for t in toks if t in NEG)
    if pos>neg:
        return 'positive'
    elif neg>pos:
        return 'negative'
    else:
        return 'neutral'

df['lex_sentiment'] = df['text'].apply(lex_sentiment)

In [8]:


# -------------- Step 5: Aggregate per-topic sentiments -------------
topic_summary = []
for t in range(n_topics):
    docs = df[df['dominant_topic']==t]
    n = len(docs)
    if n==0:
        continue
    vader_counts = docs['vader_sentiment'].value_counts().to_dict()
    lex_counts = docs['lex_sentiment'].value_counts().to_dict()
    examples = docs['response'].head(5).tolist()
    topic_summary.append({
        'topic_id': t,
        'keywords': topics[t],
        'n_docs': n,
        'vader_counts': vader_counts,
        'lex_counts': lex_counts,
        'examples': examples
    })

summary_df = pd.DataFrame([{
    'topic_id': s['topic_id'],
    'keywords': ', '.join(s['keywords']),
    'n_docs': s['n_docs'],
    'vader_positive': s['vader_counts'].get('positive',0),
    'vader_negative': s['vader_counts'].get('negative',0),
    'vader_neutral': s['vader_counts'].get('neutral',0)
} for s in topic_summary])
print(summary_df)

# Save outputs and models for the demo app
os.makedirs('output', exist_ok=True)
joblib.dump(lda, 'output/lda_model.joblib')
joblib.dump(vectorizer, 'output/vectorizer.joblib')
df.to_csv('output/evaluations_with_topics.csv', index=False)

   topic_id                                           keywords  n_docs  \
0         0  practical, data, bi, business, real, lab, theo...      22   
1         1  labs, time, content, unit, topics, work, lab, ...      27   
2         2  slides, liked, labs, lecture, content, number,...      27   
3         3  data, practical, labs, unit, tools, matter, en...      16   
4         4  lab, assignments, content, work, group, practi...      23   
5         5  topics, labs, think, end, topic, practical, un...      14   

   vader_positive  vader_negative  vader_neutral  
0              17               1              4  
1              20               2              5  
2              19               4              4  
3              10               2              4  
4              13               1              9  
5              10               1              3  


In [9]:
# -------------- Step 6: Produce a small Gradio demo file -------------
gradio_code = f'''
# gradio_app.py
import re
import joblib
import gradio as gr

# Load models
lda = joblib.load("output/lda_model.joblib")
vectorizer = joblib.load("output/vectorizer.joblib")
topics = {topics}  # List of topic names

POS = {sorted(list(POS))}  # Positive lexicon
NEG = {sorted(list(NEG))}  # Negative lexicon

def clean_text(s):
    s = str(s).lower()
    s = re.sub(r"[^a-z0-9\\s]", " ", s)
    s = re.sub(r"\\s+", " ", s).strip()
    return s

def lex_sentiment(text):
    toks = text.split()
    score = sum(1 for t in toks if t in POS) - sum(1 for t in toks if t in NEG)
    return score

def predict(text):
    text = clean_text(text)
    bow = vectorizer.transform([text])
    topic_id = lda.transform(bow)[0].argmax()
    topic = topics[topic_id]  # Runtime lookup
    sentiment_score = lex_sentiment(text)
    return f"Topic: {{topic}}, Sentiment: {{sentiment_score}}"

iface = gr.Interface(fn=predict, inputs="text", outputs="text")
iface.launch()
'''

# Write to file
with open('output/gradio_app.py','w', encoding='utf-8') as f:
    f.write(gradio_code)

print("Gradio app code generated at 'output/gradio_app.py'")


Gradio app code generated at 'output/gradio_app.py'


In [10]:
import joblib

# Save the trained models
joblib.dump(lda, 'lda_model.joblib')
joblib.dump(vectorizer, 'vectorizer.joblib')
print("Models saved successfully!")


Models saved successfully!


In [None]:
topic_labels = {
    0: "Teaching Quality",
    1: "Course Content & Structure",
    2: "Workload & Assessments"
}

In [None]:
df = pd.read_csv('course_evaluations.csv')
df['clean'] = df['evaluation'].apply(preprocess)
df['topic'] = lda.transform(vectorizer.transform(df['clean'])).argmax(axis=1)
df['sentiment'] = df['evaluation'].apply(get_sentiment)
df.head()

In [None]:
df['topic'].value_counts()

In [None]:
df['sentiment'].value_counts()

In [None]:
import gradio as gr

def predict(text):
    clean = preprocess(text)
    topic = lda.transform(vectorizer.transform([clean])).argmax()
    sentiment = get_sentiment(text)
    return topic_labels[topic], sentiment

demo = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(lines=3, placeholder="Type a student's evaluation..."),
    outputs=["text", "text"],
    title="Course Evaluation NLP Analyzer",
    description="Predicts topic and sentiment from a student's course evaluation."
)

demo.launch()

## Interpretation of Results & Recommendations
- **Teaching Quality** receives positive sentiments.
- **Course Content & Structure** shows mixed feedback.
- **Workload & Assessments** is the most negatively rated theme.

### Recommendations
- Standardize course structure to reduce confusion.
- Balance workload across the semester.
- Continue practical examples as they drive positive feedback.