In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

In [3]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\anmol\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\anmol\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
train_data = pd.read_csv("Sentimental Analysis Data.csv")
train_data = train_data.dropna(subset=["text"])

In [5]:
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

In [6]:
def preprocess_text(text):
    text = re.sub(r'[,.!\/*-]', '', text)
    words = word_tokenize(text.lower())
    words = [stemmer.stem(word) for word in words if word.isalpha() and word not in stop_words]
    return ' '.join(words)

In [7]:
train_text = [preprocess_text(text) for text in train_data["text"]]
train_labels = train_data["sentiment"]

In [8]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(train_text)

In [9]:
classifier = LinearSVC()
classifier.fit(X_train, train_labels)

In [10]:
user_input = input("Enter text: ")
user_input_preprocessed = preprocess_text(user_input)
user_input_vectorized = vectorizer.transform([user_input_preprocessed])

Enter text:  Product quality is good from my view


In [11]:
predicted_label = classifier.predict(user_input_vectorized)

In [12]:
predicted_label_names = {
    "positive": "positive",
    "negative": "negative",
    "neutral": "neutral"
}

In [13]:
test_data = [(user_input_preprocessed, predicted_label[0])]
test_text = [data[0] for data in test_data]
test_labels = [data[1] for data in test_data]
X_test = vectorizer.transform(test_text)
predicted_labels = classifier.predict(X_test)

In [14]:
precision = precision_score(test_labels, predicted_labels, pos_label="negative")
recall = recall_score(test_labels, predicted_labels, pos_label="negative")
accuracy = accuracy_score(test_labels, predicted_labels)
f1 = f1_score(test_labels, predicted_labels, pos_label="negative")

In [15]:
print("Predicted Label:", predicted_label[0])
print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:", accuracy)
print("F1_Measure_Score:", f1)

Predicted Label: positive
Precision: 0.0
Recall: 0.0
Accuracy: 1.0
F1_Measure_Score: 0.0
