In [43]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk

# Load the data
data = pd.read_csv('student_feedback.csv')


In [44]:
# Function to clean the text
def preprocess_text(text):
    # Remove punctuation and digits
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d+', '', text)
    
    # Convert to lowercase
    text = text.lower()
    
    # Tokenization
    words = word_tokenize(text)
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    
    return ' '.join(words)

# Apply preprocessing to the data
data['cleaned_feedback'] = data['feedback'].apply(preprocess_text)


In [45]:
# Use TfidfVectorizer with n-grams
vectorizer = TfidfVectorizer(ngram_range=(1, 2))  # Use both unigrams and bigrams
X = vectorizer.fit_transform(data['cleaned_feedback'])
y = data['label']


In [46]:
# Apply SMOTE to balance the dataset
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)


In [47]:
# Model Building
model = LogisticRegression()
model.fit(X_train, y_train)

# Prediction and Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

    negative       0.67      0.50      0.57         4
     neutral       0.75      0.60      0.67         5
    positive       0.43      0.60      0.50         5

    accuracy                           0.57        14
   macro avg       0.62      0.57      0.58        14
weighted avg       0.61      0.57      0.58        14



In [48]:
# Function to predict sentiment
def predict_sentiment(text):
    # Preprocess the input text
    cleaned_text = preprocess_text(text)
    
    # Transform the text to TF-IDF features
    text_vector = vectorizer.transform([cleaned_text])
    
    # Predict the sentiment
    prediction = model.predict(text_vector)
    
    # Ensure prediction is an integer
    '''prediction_index = int(prediction[0])
    
    # Map the prediction to the corresponding label
    labels = ['negative', 'neutral', 'positive']
    return labels[prediction_index]'''
    return prediction

# Example usage
user_input = input('Enter your Evaluation')
print(f"Predicted sentiment: {predict_sentiment(user_input)}")


Predicted sentiment: ['neutral']
