In [12]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import joblib
import re
from sklearn.multioutput import MultiOutputClassifier

In [9]:

# Load the dataset
df = pd.read_csv('file.csv', encoding='latin1')

df = df.dropna()

# Clean the text column using the same cleaning functions
def remove_urls(text):
    return re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)

def remove_non_ascii(text):
    # Remove non-ASCII characters
    return ''.join(char for char in text if ord(char) < 128)

def remove_digits(text):
    # Remove numeric digits
    return re.sub(r'\d+', '', text)

def remove_special_characters(text):
    # Remove special characters except whitespace
    return re.sub(r'[^\w\s]', '', text)

def normalize_case(text):
    # Normalize text to lowercase
    return text.lower()

def clean_text(text):
    # Remove URLs
    text = remove_urls(text)
    # Remove non-ASCII characters
    text = remove_non_ascii(text)
    # Remove numeric digits
    text = remove_digits(text)
    # Remove special characters except whitespace
    text = remove_special_characters(text)
    # Normalize case
    text = normalize_case(text)
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

# Apply the cleaning function to the 'text' column
df['text'] = df['text'].apply(clean_text)


In [10]:
# Create a label based on the emotion columns
# If any of the emotion columns has a value of 1, label the row as 1, else 0
emotion_columns = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 
    'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 
    'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 
    'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 
    'sadness', 'surprise', 'neutral'
]


In [13]:
# Split dataset into features (X) and target (y) for each emotion
X = df['text']
y = df[emotion_columns]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a pipeline with TfidfVectorizer and SVM classifier
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('multi_output_svc', MultiOutputClassifier(SVC(kernel='linear')))
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test)

# Save the model to a file
joblib.dump(pipeline, 'multi_emotion_detection_model.pkl')

# Load the model from the file
model = joblib.load('multi_emotion_detection_model.pkl')



In [None]:
new_comments = [
    'The product is fantastic and exceeded my expectations!',
    'I am not satisfied with the quality of the item.',
    'Great service, but the delivery was late.',
    'The experience was okay, nothing special.',
    'Absolutely love this! Will definitely recommend.',
    'The item arrived damaged and had to be returned.',
    'Customer support was very helpful and responsive.',
    'I am disappointed with the product. It did not match the description.',
    'Highly recommend this to anyone looking for a quality product.',
    'The product is decent but could be improved.',
    'Excellent quality and fast shipping.',
    'Not worth the price. I expected better.',
    'Perfect! Exactly what I needed.',
    'The service was slow, but the product is good.',
    'I will not buy this again. It was a waste of money.',
    'Very happy with the purchase. It was as described.',
    'The product exceeded my expectations. Great buy!',
    'I had a great shopping experience overall.'
]

# Create DataFrame for new comments
new_data = pd.DataFrame({'comment': new_comments})

# Basic data cleaning on new data
new_data['comment'] = new_data['comment'].str.lower()  # Convert to lowercase
new_data['comment'] = new_data['comment'].str.replace(r'\d+', '', regex=True)  # Remove numbers
new_data['comment'] = new_data['comment'].str.replace(r'[^\w\s]', '', regex=True)  # Remove punctuation
new_data['comment'] = new_data['comment'].str.strip()  # Remove whitespace

# Predict using the loaded model
new_predictions = model.predict(new_data['comment'])

# Add predictions to the new data for each emotion
for i, emotion in enumerate(emotion_columns):
    new_data[emotion] = new_predictions[:, i]

# Display the results
print(new_data[['comment'] + emotion_columns])

                                              comment  sentiment_prediction
0   the product is fantastic and exceeded my expec...                     1
1     i am not satisfied with the quality of the item                     1
2             great service but the delivery was late                     1
3             the experience was okay nothing special                     1
4      absolutely love this will definitely recommend                     1
5     the item arrived damaged and had to be returned                     1
6    customer support was very helpful and responsive                     1
7   i am disappointed with the product it did not ...                     1
8   highly recommend this to anyone looking for a ...                     1
9         the product is decent but could be improved                     1
10                excellent quality and fast shipping                     1
11              not worth the price i expected better                     1
12          