<a href="https://colab.research.google.com/github/SKB11/SenTexAI/blob/main/sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [2]:
# Step 1: Load the dataset (2490 words)
df = pd.read_csv('s1.csv')



In [3]:
# Step 2: Preprocess the data

# Convert text to lowercase
df['text'] = df['text'].str.lower()

# Remove special characters and numbers
df['text'] = df['text'].apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', x))

# Tokenization
df['text'] = df['text'].apply(lambda x: nltk.word_tokenize(x))

# Remove stopwords
stop_words = set(stopwords.words('english'))
df['text'] = df['text'].apply(lambda x: [word for word in x if word not in stop_words])

# Apply stemming
stemmer = PorterStemmer()
df['text'] = df['text'].apply(lambda x: [stemmer.stem(word) for word in x])

# Convert the preprocessed tokens back to strings
df['text'] = df['text'].apply(lambda x: ' '.join(x))


In [4]:
# Step 2: Preprocess the data (assuming you have already performed necessary preprocessing steps)

# Step 3: Split the dataset into features and labels
X = df['text']
y = df['sentiment']



In [5]:
# Step 4: Convert text to numeric features using CountVectorizer
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)



In [6]:
# Step 5: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)



In [7]:
# Step 6: Build and train the Naive Bayes model
naive_bayes = MultinomialNB()
naive_bayes.fit(X_train, y_train)



In [8]:
# Step 7: Evaluate the model
y_pred = naive_bayes.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:")
print(confusion_mat)

Accuracy: 0.9411764705882353
Precision: 0.96875
Recall: 0.8611111111111112
F1 Score: 0.911764705882353
Confusion Matrix:
[[65  1]
 [ 5 31]]


In [9]:
# Step 8: Predict sentiment for new text
new_text = ["good morning this is scared help shraddha KB from Reba University Bangalore doing PTEC in computer science and information technology"]
new_text_vectorized = vectorizer.transform(new_text)
predicted_sentiment = naive_bayes.predict(new_text_vectorized)
if predicted_sentiment[0] == 1:
    print("Positive sentiment")
    print("   Have a Happy day! (: ")
else:
    print("Negative sentiment")
    print("   Do not worry! Stay STRONG. Your help is on the Way")


Negative sentiment
   Do not worry! Stay STRONG. Your help is on the Way


In [11]:
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

# Step 1: Preprocess and clean the text
def preprocess_text(text):
    cleaned_text = re.sub(r'\W+', ' ', text.lower())
    return cleaned_text

# Step 2: Load the trained model and vectorizer
vectorizer = TfidfVectorizer()
vectorizer.fit(X)  # Assuming X is the preprocessed text from the previous code

naive_bayes = MultinomialNB()
naive_bayes.fit(X_vectorized, y)  # Assuming X_vectorized and y are from the previous code

# Step 3: Prompt user for text input
text_input = input("Enter the text for sentiment analysis: ")

# Step 4: Preprocess the user input
cleaned_text_input = preprocess_text(text_input)

# Step 5: Vectorize the preprocessed text
text_vectorized = vectorizer.transform([cleaned_text_input])

# Step 6: Predict sentiment for the input text
predicted_sentiment = naive_bayes.predict(text_vectorized)

# Step 7: Print the result and professional messages
if predicted_sentiment[0] == 1:
    print("Positive sentiment")
    print("Have a Happy day! (: ")
else:
    print("Negative sentiment")
    print("Do not worry! Stay STRONG. Your help is on the way.")


Enter the text for sentiment analysis: I spotted a robber and i am scared 
Negative sentiment
Do not worry! Stay STRONG. Your help is on the way.
