In [76]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [110]:
# Read data from Excel file
data = pd.read_excel('/content/Sentiment.xlsx')

In [111]:
data.head()

Unnamed: 0,Sentence,Sentiment
0,I absolutely love this product; it exceeded my...,Positive
1,Today is a beautiful day with clear skies and ...,Positive
2,"The concert was amazing, and the performers we...",Positive
3,I'm really disappointed with the customer serv...,Negative
4,The weather is terrible; it's been raining all...,Negative


In [112]:
data.tail()

Unnamed: 0,Sentence,Sentiment
2299,The sound quality is bad; it distorts at high ...,Negative
2300,Neutral about the pricing; it's neither too hi...,Neutral
2301,The minimalist design is bad; it looks outdate...,Negative
2302,Battery life is bad for regular use; falls sho...,Negative
2303,The pricing is bad for the quality; overpriced...,Negative


In [113]:
data.shape

(2304, 2)

In [114]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [115]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [116]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [117]:
import pandas as pd
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from bs4 import BeautifulSoup
import unicodedata
from nltk.stem import WordNetLemmatizer

def clean_text(sentence):
    # Lowercasing
    text = sentence.lower()

    # Removing HTML tags
    text = BeautifulSoup(text, 'html.parser').get_text()

    # Removing URLs
    text = ' '.join([word for word in text.split() if not word.startswith('http')])

    # Removing punctuation
    text = ''.join([char for char in text if char not in string.punctuation + '’‘'])

    # Removing numbers
    text = ''.join([i for i in text if not i.isdigit()])

    # Removing stopwords
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text)
    text = ' '.join([word for word in word_tokens if word.lower() not in stop_words])

    # Handling special characters
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(text)
    text = ' '.join([lemmatizer.lemmatize(word) for word in tokens])

    return text


In [118]:
# Display the first few rows of the dataset
print("Before Text Cleaning:")
print(data['Sentence'].head())

Before Text Cleaning:
0    I absolutely love this product; it exceeded my...
1    Today is a beautiful day with clear skies and ...
2    The concert was amazing, and the performers we...
3    I'm really disappointed with the customer serv...
4    The weather is terrible; it's been raining all...
Name: Sentence, dtype: object


In [119]:
print("After Text Cleaning:")
# Apply text cleaning to the 'text' column
data['cleaned_text'] = data['Sentence'].apply(clean_text)
print(data.head())

After Text Cleaning:
                                            Sentence Sentiment  \
0  I absolutely love this product; it exceeded my...  Positive   
1  Today is a beautiful day with clear skies and ...  Positive   
2  The concert was amazing, and the performers we...  Positive   
3  I'm really disappointed with the customer serv...  Negative   
4  The weather is terrible; it's been raining all...  Negative   

                                       cleaned_text  
0      absolutely love product exceeded expectation  
1            today beautiful day clear sky sunshine  
2             concert amazing performer outstanding  
3  im really disappointed customer service received  
4                      weather terrible raining day  


In [120]:
data.value_counts('Sentiment')

Sentiment
Positive    1161
Negative     692
Neutral      287
Mixed        163
dtype: int64

In [121]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2304 entries, 0 to 2303
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Sentence      2304 non-null   object
 1   Sentiment     2303 non-null   object
 2   cleaned_text  2304 non-null   object
dtypes: object(3)
memory usage: 54.1+ KB


In [122]:
data.head()

Unnamed: 0,Sentence,Sentiment,cleaned_text
0,I absolutely love this product; it exceeded my...,Positive,absolutely love product exceeded expectation
1,Today is a beautiful day with clear skies and ...,Positive,today beautiful day clear sky sunshine
2,"The concert was amazing, and the performers we...",Positive,concert amazing performer outstanding
3,I'm really disappointed with the customer serv...,Negative,im really disappointed customer service received
4,The weather is terrible; it's been raining all...,Negative,weather terrible raining day


In [123]:
data.isnull().sum()

Sentence        0
Sentiment       1
cleaned_text    0
dtype: int64

In [124]:
data_1=data.dropna()

In [125]:
data_1.isna().sum()

Sentence        0
Sentiment       0
cleaned_text    0
dtype: int64

In [126]:
data_1.shape

(2303, 3)

In [127]:
data_1.duplicated

<bound method DataFrame.duplicated of                                                Sentence Sentiment  \
0     I absolutely love this product; it exceeded my...  Positive   
1     Today is a beautiful day with clear skies and ...  Positive   
2     The concert was amazing, and the performers we...  Positive   
3     I'm really disappointed with the customer serv...  Negative   
4     The weather is terrible; it's been raining all...  Negative   
...                                                 ...       ...   
2299  The sound quality is bad; it distorts at high ...  Negative   
2300  Neutral about the pricing; it's neither too hi...   Neutral   
2301  The minimalist design is bad; it looks outdate...  Negative   
2302  Battery life is bad for regular use; falls sho...  Negative   
2303  The pricing is bad for the quality; overpriced...  Negative   

                                           cleaned_text  
0          absolutely love product exceeded expectation  
1                

In [130]:
import pandas as pd

def clean_and_save(input_file_path, output_file_path):
    # Read Excel file into a pandas DataFrame
    df = pd.read_excel(input_file_path)

    # Remove duplicate rows
    cleaned_data = df.drop_duplicates()

    if cleaned_data.empty:
        print("No redundancy found.")
    else:
        print("Number of redundant rows removed:", len(df) - len(cleaned_data))
        print("Cleaned DataFrame shape:", cleaned_data.shape)

        # Save the cleaned data to an Excel file
        cleaned_data.to_excel(output_file_path, index=False)
        print("Cleaned data saved to:", output_file_path)

# Example usage:
input_path = '/content/Sentiment.xlsx'
output_path = '/content/Cleaned_Sentiment.xlsx'
clean_and_save(input_path, output_path)


Number of redundant rows removed: 0
Cleaned DataFrame shape: (2304, 2)
Cleaned data saved to: /content/Cleaned_Sentiment_1.xlsx


In [131]:
data_1.head()

Unnamed: 0,Sentence,Sentiment,cleaned_text
0,I absolutely love this product; it exceeded my...,Positive,absolutely love product exceeded expectation
1,Today is a beautiful day with clear skies and ...,Positive,today beautiful day clear sky sunshine
2,"The concert was amazing, and the performers we...",Positive,concert amazing performer outstanding
3,I'm really disappointed with the customer serv...,Negative,im really disappointed customer service received
4,The weather is terrible; it's been raining all...,Negative,weather terrible raining day


In [132]:
data_1.shape

(2303, 3)

In [133]:
# Split data into features (X) and labels (y)
X = data_1['cleaned_text']
y = data_1['Sentiment']

In [134]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [135]:
# Create a CountVectorizer to convert text data to a bag-of-words representation
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)


In [136]:
# Create an SVM classifier
classifier = SVC(kernel='linear')

In [137]:
# Train the classifier
classifier.fit(X_train_vectorized, y_train)

In [138]:
# Make predictions on the test set
predictions = classifier.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions, zero_division=1)


print(f"Accuracy: {accuracy * 100:}%")
print("Classification Report:\n", report)

Accuracy: 90.88277858176555%
Classification Report:
               precision    recall  f1-score   support

       Mixed       0.88      0.92      0.90        48
    Negative       0.91      0.92      0.91       208
     Neutral       0.80      0.77      0.78        78
    Positive       0.94      0.93      0.93       357

    accuracy                           0.91       691
   macro avg       0.88      0.88      0.88       691
weighted avg       0.91      0.91      0.91       691



In [139]:
import joblib  # If you're using scikit-learn version < 0.23
# For scikit-learn version 0.23 and above, use:
# from joblib import dump

# Assuming 'classifier' is your trained classifier
# Replace 'classifier' with the actual variable name of your trained classifier
# Save the trained classifier to a file
model_filename = 'Sentiment_classifier_model.joblib'
joblib.dump(classifier, model_filename)

print(f"Classifier model saved to {model_filename}")

# Now, you can load the model later if needed
loaded_classifier = joblib.load(model_filename)

Classifier model saved to Sentiment_classifier_model.joblib


In [140]:
from sklearn.feature_extraction.text import TfidfVectorizer
from joblib import dump, load

# Assuming 'vectorizer' is your TfidfVectorizer model
# Replace 'vectorizer' with the actual variable name of your vectorizer

# Save the vectorizer to a file
vectorizer_filename = 'vectorizer_model.joblib'
dump(vectorizer, vectorizer_filename)

print(f"TfidfVectorizer model saved to {vectorizer_filename}")

# Now, you can load the vectorizer later if needed
loaded_vectorizer = load(vectorizer_filename)


TfidfVectorizer model saved to vectorizer_model.joblib


In [141]:
# Load the saved model
model = joblib.load('/content/Sentiment_classifier_model.joblib')  # Replace with your actual filename

# Load the TF-IDF vectorizer used during training
tfidf_vectorizer = joblib.load('/content/vectorizer_model.joblib')  # Replace with your actual filename


# Take user input
user_input = input("Enter a sentence: ")

# Clean the user input
cleaned_input = clean_text(user_input)

# Transform the cleaned text data using the TF-IDF vectorizer
input_matrix = tfidf_vectorizer.transform([cleaned_input])

# Make prediction
prediction = model.predict(input_matrix)[0]

# Display the prediction
print(f"Predicted Sentiment: {prediction}")
# Create a DataFrame with the results
df_result = pd.DataFrame({'User_Input': [user_input], 'Predicted_Sentiment': [prediction]})

# Save the DataFrame to an Excel file (append if the file already exists)
excel_filename = '/content/output_predictions.xlsx'  # Replace with your desired filename
try:
    # Load existing predictions from the Excel file
    df_existing = pd.read_excel(excel_filename)

    # Append the new predictions to the existing DataFrame
    df_combined = pd.concat([df_existing, df_result], ignore_index=True)

except FileNotFoundError:
    # If the file doesn't exist, create a new DataFrame
    df_combined = df_result

# Save the combined DataFrame to the Excel file
df_combined.to_excel(excel_filename, index=False)

Enter a sentence: i think this is good
Predicted Sentiment: Positive
