In [None]:
# Install required packages
!pip install pandas scikit-learn nltk

import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Download NLTK stopwords
nltk.download('stopwords')

# Step 1: Load the Dataset
# Make sure to replace 'feedback_data.csv' with your actual CSV file path
# The CSV is expected to have two columns: 'feedback' and 'label'
# 'feedback' contains the user feedback, and 'label' is either 'positive' or 'negative'
data = pd.read_csv('feedback_data.csv')

# Display the first few rows of the dataset
print(data.head())

# Step 2: Preprocess the Text Data
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation and digits
    text = ''.join([char for char in text if char.isalpha() or char.isspace()])
    # Remove stopwords
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

# Apply preprocessing to the feedback column
data['processed_feedback'] = data['feedback'].apply(preprocess_text)

# Step 3: Vectorization
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    data['processed_feedback'], data['label'], test_size=0.2, random_state=42
)

# Create a TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 4: Train a Machine Learning Model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Step 5: Make Predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
print(classification_report(y_test, y_pred))

# Function to predict new feedback
def predict_feedback(feedback):
    processed = preprocess_text(feedback)
    feedback_tfidf = vectorizer.transform([processed])
    prediction = model.predict(feedback_tfidf)
    return prediction[0]

# Example usage:
new_feedback = "The product was excellent and I loved it!"
result = predict_feedback(new_feedback)
print(f"Feedback: '{new_feedback}' is classified as: {result}")