# **SENTIMENT ANALYSIS PROJECT**

### 1) Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
import random

nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('stopwords')
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

plt.style.use('ggplot')

### 2) Loading Data

In [None]:
df = pd.read_csv('amazon_alexa.csv')
df = df.rename(columns={'verified_reviews': 'Reviews'})
df.head(20)


In [None]:
print(df.shape)

### 3) Preliminary data analysis

In [None]:
visual = df['rating'].value_counts().sort_index().plot(
    kind='bar',
    title='Number of reviews VS Stars',
    figsize=(5,2),
    xlabel= 'Rating in stars',
    ylabel= 'Number of reviews'
)
plt.show()

### 4) Data Cleaning

In [None]:
def number_word(stars):
    if stars == 5:
        return "Five stars"
    elif stars == 4:
        return "Four Stars"
    elif stars == 3:
        return "Three stars"
    elif stars == 2:
        return "Two stars"
    else:
        return "One star"

df['Reviews'] = df['Reviews'].fillna(df['rating'].apply(number_word))
df.iloc[12]


### 5) Assign Sentiment

In [None]:
df['sentiment'] = df['rating'].apply(lambda x: 'positive' if x > 3 else ('neutral' if x == 3 else 'negative'))
df.head()

In [None]:
sentiment_counts = df['sentiment'].value_counts()

print("Sentiment Counts:")
for sentiment, count in sentiment_counts.items():
    print(f"{sentiment}: {count} reviews")

y = np.array(sentiment_counts)
mylabels = ["Positive", "Negative", "Neutral"]

plt.pie(y, labels = mylabels)
plt.show() 

### 5) Pre-processing

In [None]:
def preprocess_text(text):
    text = re.sub(r"[^a-zA-Z]", " ", text)
    text = text.lower()
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    processed_text = ' '.join(tokens)
    return processed_text

df['Reviews'] = df['Reviews'].astype(str)
df['Reviews'] = df['Reviews'].apply(preprocess_text)
df.head(20)


### 6) Training model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['Reviews'], df['sentiment'], test_size=0.2, random_state=42)

### 7) TF-IDF vectoriser

In [None]:
vectorizer = TfidfVectorizer(stop_words='english')
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)

### 8) Using sklearn.svm to build the model

In [None]:
model = LinearSVC()
model.fit(X_train_vectors, y_train)

### 9) Evaluating the model

In [None]:
y_pred = model.predict(X_test_vectors)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print detailed results
print("Detailed Results:")
for i, review in enumerate(X_test):
    prediction = y_pred[i]
    actual = y_test.iloc[i]
    print(f"Review: {review}")
    print(f"Actual sentiment: {actual}")
    print(f"Predicted sentiment: {prediction}")
    print()

### 10) Testing the model

In [None]:
new_reviews = ["This product is amazing!","product is average, expected more" ,"Horrible product"]
new_reviews_vectors = vectorizer.transform(new_reviews)
predictions = model.predict(new_reviews_vectors)

for review, prediction in zip(new_reviews, predictions):
    print(f"Review: {review}\nSentiment: {prediction}\n")


Enter your own review and check its sentiment

In [None]:
new = [input('Enter your review: ')]
new_reviews_vector = vectorizer.transform(new)
new_prediction = model.predict(new_reviews_vector)

for review, prediction in zip(new, new_prediction):
    print(f"Review: {review}\nSentiment: {prediction}\n")

### 11) SAVE MODEL

In [None]:
import joblib
joblib.dump(model, 'model.pkl')
joblib.dump(model, 'model.joblib')