#**Sentiment Analysis Model on E-Commerce Website**


### Install necessary packages

In [2]:
!pip install streamlit
!pip install joblib

Collecting streamlit
  Downloading streamlit-1.25.0-py2.py3-none-any.whl (8.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m56.0 MB/s[0m eta [36m0:00:00[0m
Collecting pympler<2,>=0.9 (from streamlit)
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
Collecting tzlocal<5,>=1.1 (from streamlit)
  Downloading tzlocal-4.3.1-py3-none-any.whl (20 kB)
Collecting validators<1,>=0.2 (from streamlit)
  Downloading validators-0.21.2-py3-none-any.whl (25 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.32-py3-none-any.whl (188 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8 (from streamlit)
  Downloading pydeck-0.8.0-py2.py3-none-any.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

### Imports

In [3]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score
import joblib
import streamlit as st

### Download NLTK resources

In [4]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

### Define preprocessing function

In [5]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    words = word_tokenize(text)
    words = [word for word in words if word not in stopwords.words("english")]
    words = [PorterStemmer().stem(word) for word in words]
    return " ".join(words)


### Load and preprocess training data

In [10]:
with open('train.ft.txt', 'r') as train_file:
    train_data = train_file.readlines()

train_comments = [re.sub(r'^__label__[0-9]+ ', '', line).strip() for line in train_data]
train_labels = [re.findall(r'^__label__([0-9]+)', line)[0] for line in train_data]


### Train the model

In [11]:
train_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('model', LogisticRegression(max_iter=100))
])
train_pipeline.fit(train_comments, train_labels)


### Save the trained model

In [12]:
model_filename = 'sentiment_model.pkl'
joblib.dump(train_pipeline, model_filename)


['sentiment_model.pkl']

### Load and preprocess testing data

In [13]:
with open('test.ft.txt', 'r') as test_file:
    test_data = test_file.readlines()

test_comments = [re.sub(r'^__label__[0-9]+ ', '', line).strip() for line in test_data]
test_labels = [re.findall(r'^__label__([0-9]+)', line)[0] for line in test_data]


### Predict on testing data

In [14]:
test_predictions = train_pipeline.predict(test_comments)

### Calculate evaluation metrics

In [15]:
accuracy = accuracy_score(test_labels, test_predictions)
precision = precision_score(test_labels, test_predictions, average='weighted')
recall = recall_score(test_labels, test_predictions, average='weighted')


## Streamlit app

In [16]:
st.title("E-commerce Sentiment Analysis")
comment = st.text_area("Enter your comment:")
if st.button("Analyze"):
    preprocessed_comment = preprocess_text(comment)
    sentiment = train_pipeline.predict([preprocessed_comment])[0]
    st.write("Predicted Sentiment:", sentiment)


2023-08-16 19:59:27.627 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py [ARGUMENTS]


### Display evaluation metrics

In [17]:
st.subheader("Model Evaluation Metrics")
st.write("Accuracy:", accuracy)
st.write("Precision:", precision)
st.write("Recall:", recall)