# Course and Peer Reviews: Transformers Demo

This notebook loads `course_reviews.csv` and `peer_reviews.csv`, performs basic preprocessing and demonstrates summarization and sentiment analysis using Hugging Face transformers.

In [None]:
import pandas as pd
from pathlib import Path
from transformers import pipeline
from src.utils import clean_text
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
nltk.download('stopwords')

In [None]:
data_dir = Path('data')
course_path = next(data_dir.glob('course_reviews.csv'))
peer_path = next(data_dir.glob('peer_reviews.csv'))
course_df = pd.read_csv(course_path)
peer_df = pd.read_csv(peer_path)
print('Course reviews:', course_df.shape)
print('Peer reviews:', peer_df.shape)
course_df.head()

In [None]:
print(course_df.columns)
print(peer_df.columns)
peer_df.head()

In [None]:
def preprocess(text):
    text = clean_text(text)
    tokens = word_tokenize(text)
    sw = set(stopwords.words('english'))
    tokens = [t for t in tokens if t not in sw]
    return ' '.join(tokens)

course_df['clean_text'] = course_df['review_text'].apply(preprocess)
peer_df['clean_text'] = peer_df['comment'].apply(preprocess)

In [None]:
course_df['rating'].hist()
plt.title('Course review ratings')
plt.show()
peer_df['score'].hist()
plt.title('Peer review scores')
plt.show()

In [None]:
sentiment = pipeline('sentiment-analysis')
print(sentiment(course_df['clean_text'].iloc[0]))
print(sentiment(peer_df['clean_text'].iloc[0]))

In [None]:
summarizer = pipeline('summarization')
print('Summary course:', summarizer(course_df['clean_text'].iloc[0])[0]['summary_text'])
print('Summary peer:', summarizer(peer_df['clean_text'].iloc[0])[0]['summary_text'])

In [None]:
qa = pipeline('question-answering')
context = course_df['clean_text'].iloc[0]
question = 'What is this review about?'
print(qa(question=question, context=context)['answer'])