In [None]:
pip install -q transformers

In [None]:
import requests
import pandas as pd
import json

profs = ["Maksym Morawski", "Cliff Bakalian", "Ilchul Yoon", "Clyde Kruskal"]
all_reviews = []
url = "https://planetterp.com/api/v1/professor"
for prof in profs:
    response = requests.get(url, params={"name": prof, "reviews": "true"})
    data = response.json()
    reviews = data.get("reviews", [])
    all_reviews.extend(reviews)

df = pd.DataFrame(all_reviews)
print(df.tail())

# Use sentiment analysis of students' reviews
from transformers import pipeline

pipe = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

reviews = df['review'].tolist()
indexes_to_drop = []
sentiments = []

for idx, review in enumerate(df['review']):
    try:
        result = pipe(review, truncation=True)[0]
        sentiments.append(result['label'])
    except Exception as e:
        print(f"Dropping review at index {idx} due to error: {e}")
        indexes_to_drop.append(idx)
        sentiments.append(None)

# Drop the bad reviews
df.drop(index=indexes_to_drop, inplace=True)
df.reset_index(drop=True, inplace=True)

# Only keep valid sentiments
df['sentiment'] = [s for s in sentiments if s is not None]

# Sentiment-to-star
sentiment_to_star = {
    "positive": 3,
    "neutral": 2,
    "negative": 1
}
df['sentiment_to_star'] = df['sentiment'].map(sentiment_to_star)
df = df.dropna(subset=['sentiment_to_star'])


# Data cleaning and malnipulation
filtered_data = df.dropna(subset=['rating', 'expected_grade'])
valid_grades = {'A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F', 'XF', 'W', 'P'}
filtered_data = filtered_data[filtered_data['expected_grade'].isin(valid_grades)]

grade_to_number = {'A+': 4.0, 'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7, 'C+': 2.3, 'C': 2.0, 'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'D-': 0.7, 'F': 0.0, 'XF': 0.0, 'W': 0.0, 'P': 0.0}
filtered_data['grade_number'] = filtered_data['expected_grade'].map(grade_to_number)

filtered_data['review_length'] = filtered_data['review'].apply(lambda x: len(str(x)))

# convert rating to good, bad, or neutral for classify
def categorize_rating(rating):
    if rating > 3:
        return 'good'
    elif rating < 3:
        return 'bad'
    else:
        return 'neutral'
filtered_data['rating_category'] = filtered_data['rating'].apply(categorize_rating)


print(filtered_data.head())

         professor   course  \
594  Clyde Kruskal  CMSC451   
595  Clyde Kruskal  CMSC351   
596  Clyde Kruskal  CMSC351   
597  Clyde Kruskal  CMSC351   
598  Clyde Kruskal  CMSC351   

                                                review  rating expected_grade  \
594  I would recommend him for 451. He's very disor...       4              A   
595  Does not give syllabus for class. Does not tel...       2             A-   
596  I was scared shitless before starting this cla...       4             A-   
597  very hard professor, but u wont learn jacks**t...       2              B   
598  VERY VERY VERY MESSY CLASS. AVOID IF CAN. ITS ...       1              C   

                         created  
594  2024-12-23T19:44:19.894218Z  
595  2024-12-24T17:29:23.721410Z  
596  2025-02-06T19:10:50.448880Z  
597  2025-04-02T19:45:52.387020Z  
598  2025-04-11T14:29:47.994442Z  


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Dropping review at index 19 due to error: The expanded size of the tensor (729) must match the existing size (514) at non-singleton dimension 1.  Target sizes: [1, 729].  Tensor sizes: [1, 514]
Dropping review at index 79 due to error: The expanded size of the tensor (1121) must match the existing size (514) at non-singleton dimension 1.  Target sizes: [1, 1121].  Tensor sizes: [1, 514]
Dropping review at index 93 due to error: The expanded size of the tensor (679) must match the existing size (514) at non-singleton dimension 1.  Target sizes: [1, 679].  Tensor sizes: [1, 514]
Dropping review at index 94 due to error: The expanded size of the tensor (577) must match the existing size (514) at non-singleton dimension 1.  Target sizes: [1, 577].  Tensor sizes: [1, 514]
Dropping review at index 106 due to error: The expanded size of the tensor (614) must match the existing size (514) at non-singleton dimension 1.  Target sizes: [1, 614].  Tensor sizes: [1, 514]
Dropping review at index 14

In [None]:
# Split 3 profs for training and 1 for testing
train_profs = ["Cliff Bakalian", "Ilchul Yoon", "Clyde Kruskal"]
test_prof = ["Maksym Morawski"]

train_data = filtered_data[filtered_data['professor'].isin(train_profs)]
test_data = filtered_data[filtered_data['professor'].isin(test_prof)]


In [None]:
# Tree Classifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tabulate import tabulate
feature_sets = [
    ['sentiment_to_star'],
    ['review_length'],
    ['grade_number'],
    ['sentiment_to_star', 'review_length'],
    ['sentiment_to_star', 'grade_number'],
    ['review_length', 'grade_number'],
    ['sentiment_to_star', 'review_length', 'grade_number']
]
actual_counts = test_data['rating_category'].value_counts()
results = []
for features in feature_sets:
    X_train = train_data[features]
    X_test = test_data[features]
    y_train = train_data['rating_category']
    y_test = test_data['rating_category']

    model = DecisionTreeClassifier(max_depth=4, random_state=42)
    model.fit(X_train, y_train)
    predicted_rating = model.predict(X_test)
    pred_counts = pd.Series(predicted_rating).value_counts()
    accuracy = accuracy_score(y_test, predicted_rating)
    results.append({
     'Features': features,
     'Accuracy': round(accuracy, 3),
     'Pred Good': pred_counts.get('good', 0),
     'Pred Neutral': pred_counts.get('neutral', 0),
     'Pred Bad': pred_counts.get('bad', 0),
     'Actual Good': actual_counts.get('good', 0),
     'Actual Neutral': actual_counts.get('neutral', 0),
     'Actual Bad': actual_counts.get('bad', 0),
})

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='Accuracy', ascending=False)
print(tabulate(results_df, headers='keys', tablefmt='pretty', showindex=False))




+--------------------------------------------------------+----------+-----------+--------------+----------+-------------+----------------+------------+
|                        Features                        | Accuracy | Pred Good | Pred Neutral | Pred Bad | Actual Good | Actual Neutral | Actual Bad |
+--------------------------------------------------------+----------+-----------+--------------+----------+-------------+----------------+------------+
|                 ['sentiment_to_star']                  |  0.691   |    57     |      0       |   105    |     72      |       22       |     68     |
|         ['sentiment_to_star', 'grade_number']          |  0.691   |    56     |      11      |    95    |     72      |       22       |     68     |
|         ['sentiment_to_star', 'review_length']         |  0.673   |    57     |      24      |    81    |     72      |       22       |     68     |
| ['sentiment_to_star', 'review_length', 'grade_number'] |  0.667   |    53     |      2

In [None]:
# Tree Regressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error
from tabulate import tabulate
feature_sets = [
    ['sentiment_to_star'],
    ['review_length'],
    ['grade_number'],
    ['sentiment_to_star', 'review_length'],
    ['sentiment_to_star', 'grade_number'],
    ['review_length', 'grade_number'],
    ['sentiment_to_star', 'review_length', 'grade_number']
]

results = []

for features in feature_sets:
    X_train = train_data[features]
    y_train = train_data['rating']
    X_test = test_data[features]
    y_test = test_data['rating']


    tree_model = DecisionTreeRegressor(max_depth=4, random_state=42)
    tree_model.fit(X_train, y_train)
    predicted_rating = tree_model.predict(X_test)
    r2 = r2_score(y_test, predicted_rating)
    rmse = mean_squared_error(y_test, predicted_rating)
    average_predicted_rating = predicted_rating.mean()

    results.append({
        'Features': features,
        'R²': round(r2, 3),
        'RMSE': round(rmse, 2),
        'Average Predicted Rating': round(average_predicted_rating, 2)
    })

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='R²', ascending=False)
print(tabulate(results_df, headers='keys', tablefmt='pretty', showindex=False))

+--------------------------------------------------------+--------+------+--------------------------+
|                        Features                        |   R²   | RMSE | Average Predicted Rating |
+--------------------------------------------------------+--------+------+--------------------------+
|                 ['sentiment_to_star']                  | 0.452  | 1.37 |           2.92           |
|         ['sentiment_to_star', 'grade_number']          | 0.424  | 1.43 |           2.95           |
| ['sentiment_to_star', 'review_length', 'grade_number'] | 0.353  | 1.61 |           3.0            |
|         ['sentiment_to_star', 'review_length']         | 0.305  | 1.73 |           2.89           |
|                    ['grade_number']                    | 0.112  | 2.21 |           3.02           |
|           ['review_length', 'grade_number']            | 0.043  | 2.39 |           3.03           |
|                   ['review_length']                    | -0.045 | 2.6  |        

In [None]:
# Linear regession
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from tabulate import tabulate
feature_sets = [
    ['sentiment_to_star'],
    ['review_length'],
    ['grade_number'],
    ['sentiment_to_star', 'review_length'],
    ['sentiment_to_star', 'grade_number'],
    ['review_length', 'grade_number'],
    ['sentiment_to_star', 'review_length', 'grade_number']
]

results = []

for features in feature_sets:
  X_train = train_data[features]
  y_train = train_data['rating']
  X_test = test_data[features]
  y_test = test_data['rating']


  linear_model = LinearRegression()
  linear_model.fit(X_train, y_train)
  predicted_ratings = linear_model.predict(X_test)
  r2 = r2_score(y_test, predicted_ratings)
  rmse = mean_squared_error(y_test, predicted_ratings)
  average_predicted_rating = predicted_ratings.mean()
  results.append({
        'Features': features,
        'R²': round(r2, 3),
        'RMSE': round(rmse, 2),
        'Average Predicted Rating': round(average_predicted_rating, 2)
    })

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='R²', ascending=False)
print(tabulate(results_df, headers='keys', tablefmt='pretty', showindex=False))

+--------------------------------------------------------+-------+------+--------------------------+
|                        Features                        |  R²   | RMSE | Average Predicted Rating |
+--------------------------------------------------------+-------+------+--------------------------+
| ['sentiment_to_star', 'review_length', 'grade_number'] | 0.543 | 1.14 |           2.96           |
|         ['sentiment_to_star', 'grade_number']          | 0.528 | 1.18 |           2.96           |
|         ['sentiment_to_star', 'review_length']         | 0.498 | 1.25 |           2.94           |
|                 ['sentiment_to_star']                  | 0.479 | 1.3  |           2.92           |
|           ['review_length', 'grade_number']            | 0.118 | 2.2  |           2.97           |
|                    ['grade_number']                    | 0.117 | 2.2  |           2.97           |
|                   ['review_length']                    | 0.005 | 2.48 |           2.91   

In [None]:
# Using random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tabulate import tabulate
feature_sets = [
    ['sentiment_to_star'],
    ['review_length'],
    ['grade_number'],
    ['sentiment_to_star', 'review_length'],
    ['sentiment_to_star', 'grade_number'],
    ['review_length', 'grade_number'],
    ['sentiment_to_star', 'review_length', 'grade_number']]
results = []
actual_counts = test_data['rating_category'].value_counts()
for features in feature_sets:
    X_train = train_data[features]
    X_test = test_data[features]
    y_train = train_data['rating_category']
    y_test = test_data['rating_category']

    model = RandomForestClassifier(max_depth=4, random_state=42)
    model.fit(X_train, y_train)
    predicted_rating = model.predict(X_test)
    pred_counts = pd.Series(predicted_rating).value_counts()
    accuracy = accuracy_score(y_test, predicted_rating)
    results.append({
     'Features': features,
     'Accuracy': round(accuracy, 3),
     'Pred Good': pred_counts.get('good', 0),
     'Pred Neutral': pred_counts.get('neutral', 0),
     'Pred Bad': pred_counts.get('bad', 0),
     'Actual Good': actual_counts.get('good', 0),
     'Actual Neutral': actual_counts.get('neutral', 0),
     'Actual Bad': actual_counts.get('bad', 0),
})

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='Accuracy', ascending=False)
print(tabulate(results_df, headers='keys', tablefmt='pretty', showindex=False))


+--------------------------------------------------------+----------+-----------+--------------+----------+-------------+----------------+------------+
|                        Features                        | Accuracy | Pred Good | Pred Neutral | Pred Bad | Actual Good | Actual Neutral | Actual Bad |
+--------------------------------------------------------+----------+-----------+--------------+----------+-------------+----------------+------------+
|                 ['sentiment_to_star']                  |  0.691   |    57     |      0       |   105    |     72      |       22       |     68     |
|         ['sentiment_to_star', 'grade_number']          |  0.691   |    56     |      11      |    95    |     72      |       22       |     68     |
|         ['sentiment_to_star', 'review_length']         |  0.691   |    60     |      9       |    93    |     72      |       22       |     68     |
| ['sentiment_to_star', 'review_length', 'grade_number'] |  0.691   |    60     |      8

In [None]:
# Using random forest
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from tabulate import tabulate
feature_sets = [
    ['sentiment_to_star'],
    ['review_length'],
    ['grade_number'],
    ['sentiment_to_star', 'review_length'],
    ['sentiment_to_star', 'grade_number'],
    ['review_length', 'grade_number'],
    ['sentiment_to_star', 'review_length', 'grade_number']]
result = []


for features in feature_sets:
  X_train = train_data[features]
  y_train = train_data['rating']
  X_test = test_data[features]
  y_test = test_data['rating']

  forest_model = RandomForestRegressor(n_estimators=100, max_depth=4, random_state=42)

  forest_model.fit(X_train, y_train)
  predicted_ratings = forest_model.predict(X_test)
  r2 = r2_score(y_test, predicted_ratings)
  rmse = mean_squared_error(y_test, predicted_ratings)
  average_predicted_rating = predicted_ratings.mean()
  result.append({
      'Features': features,
      'R²': round(r2, 3),
      'RMSE': round(rmse, 2),
      'Average Predicted Rating': round(average_predicted_rating, 2)
  })
results_df = pd.DataFrame(result)
results_df = results_df.sort_values(by='R²', ascending=False)
print(tabulate(results_df, headers='keys', tablefmt='pretty', showindex=False))

+--------------------------------------------------------+--------+------+--------------------------+
|                        Features                        |   R²   | RMSE | Average Predicted Rating |
+--------------------------------------------------------+--------+------+--------------------------+
|                 ['sentiment_to_star']                  | 0.453  | 1.36 |           2.93           |
| ['sentiment_to_star', 'review_length', 'grade_number'] | 0.439  | 1.4  |           3.0            |
|         ['sentiment_to_star', 'review_length']         | 0.437  | 1.4  |           2.94           |
|         ['sentiment_to_star', 'grade_number']          | 0.418  | 1.45 |           2.98           |
|                    ['grade_number']                    | 0.112  | 2.21 |           3.04           |
|           ['review_length', 'grade_number']            | 0.086  | 2.28 |           3.01           |
|                   ['review_length']                    | -0.037 | 2.58 |        