In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [None]:
# Step 1: Load and preprocess the dataset
data = pd.read_csv('restaurant_reviews.csv')
X = data['Review_Text']  # Input: Review text
y = data['Taste_of_Food']  # Output: Sentiment labels for Taste of Food category

In [None]:
# Preprocessing (tokenization, stopwords removal, etc.)


In [None]:
# Step 2: Feature Engineering
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_tfidf = tfidf_vectorizer.fit_transform(X)

In [None]:
# Step 3: Model Selection and Training
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# Step 4: Model Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# Step 6: Threshold Selection (for each category)
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Assuming you have already trained your model and obtained predicted probabilities
y_prob = model.predict_proba(X_test)[:, 1]  # Probability of being positive class

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.show()

# Find the optimal threshold (You can customize this part)
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]
print("Optimal Threshold:", optimal_threshold)

In [None]:
# Step 7: Inference
# Use the trained model(s) to predict sentiment scores for new reviews
new_reviews = ["The food was delicious!", "Terrible experience."]
new_reviews_tfidf = tfidf_vectorizer.transform(new_reviews)
predicted_sentiment = model.predict(new_reviews_tfidf)
print(predicted_sentiment)