In [1]:
# Import necessary libraries
# Pandas- Dataset manipulation
import pandas as pd
import numpy as np

# Regex and string manipulation
import re
import string

# NLTK
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
# Uncomment if not downloaded
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

# Scikit-Learn
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

# Models
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


# Metrics
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [48]:
# Import data set
# reviews = pd.read_csv("/content/Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products.csv", on_bad_lines='skip')
reviews = pd.read_csv("/content/Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products.csv", encoding='utf-8')

In [49]:
reviews_df = pd.DataFrame(reviews)

In [50]:
reviews_df.head()

Unnamed: 0,id,dateAdded,dateUpdated,name,asins,brand,categories,primaryCategories,imageURLs,keys,...,reviews.dateSeen,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.username,sourceURLs
0,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,"2018-05-27T00:00:00Z,2017-09-18T00:00:00Z,2017...",False,,0,3,http://reviews.bestbuy.com/3545/5442403/review...,I thought it would be as big as small paper bu...,Too small,llyyue,https://www.newegg.com/Product/Product.aspx%25...
1,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,"2018-05-27T00:00:00Z,2017-07-07T00:00:00Z,2017...",True,,0,5,http://reviews.bestbuy.com/3545/5442403/review...,This kindle is light and easy to use especiall...,Great light reader. Easy to use at the beach,Charmi,https://www.newegg.com/Product/Product.aspx%25...
2,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,2018-05-27T00:00:00Z,True,,0,4,https://reviews.bestbuy.com/3545/5442403/revie...,Didnt know how much i'd use a kindle so went f...,Great for the price,johnnyjojojo,https://www.newegg.com/Product/Product.aspx%25...
3,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,2018-10-09T00:00:00Z,True,177283626.0,3,5,https://redsky.target.com/groot-domain-api/v1/...,I am 100 happy with my purchase. I caught it o...,A Great Buy,Kdperry,https://www.newegg.com/Product/Product.aspx%25...
4,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,2018-05-27T00:00:00Z,True,,0,5,https://reviews.bestbuy.com/3545/5442403/revie...,Solid entry level Kindle. Great for kids. Gift...,Solid entry-level Kindle. Great for kids,Johnnyblack,https://www.newegg.com/Product/Product.aspx%25...


# Exploratory Data Analysis

In [51]:
# Verify size of the dataset
print(f"Number of rows in the dataset: {reviews_df.shape[0]}")
print(f"Number of columns in the dataset: {reviews_df.shape[1]}")

Number of rows in the dataset: 5000
Number of columns in the dataset: 24


In [52]:
# Check dataset types
reviews.dtypes

Unnamed: 0,0
id,object
dateAdded,object
dateUpdated,object
name,object
asins,object
brand,object
categories,object
primaryCategories,object
imageURLs,object
keys,object


In [53]:
# Check for NaN values
reviews_df.isnull().sum()

Unnamed: 0,0
id,0
dateAdded,0
dateUpdated,0
name,0
asins,0
brand,0
categories,0
primaryCategories,0
imageURLs,0
keys,0


### Keep the following columns for model use
- name
- categories
- primary category
- reviews.doRecommend
- reviews.numHelpful
- reviews.rating
- reviews.text

In [54]:
# Keep necessary columns for analysis
reduced_review_df = reviews_df.iloc[:, [6, 7, 18, 20]]
reduced_review_df.head()

Unnamed: 0,categories,primaryCategories,reviews.rating,reviews.text
0,"Computers,Electronics Features,Tablets,Electro...",Electronics,3,I thought it would be as big as small paper bu...
1,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,This kindle is light and easy to use especiall...
2,"Computers,Electronics Features,Tablets,Electro...",Electronics,4,Didnt know how much i'd use a kindle so went f...
3,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,I am 100 happy with my purchase. I caught it o...
4,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,Solid entry level Kindle. Great for kids. Gift...


In [55]:
rows = reduced_review_df.shape[0]
cols = reduced_review_df.shape[1]

print(f"Num of rows for the reduced df: {rows}")
print(f"Num of columns for the reduced df: {cols}")

Num of rows for the reduced df: 5000
Num of columns for the reduced df: 4


In [56]:
# Measure the length of each review in the reviews.text column
each_review_length = reduced_review_df['reviews.text'].apply(lambda rev: len(rev.split()))

In [57]:
reduced_review_df.head()

Unnamed: 0,categories,primaryCategories,reviews.rating,reviews.text
0,"Computers,Electronics Features,Tablets,Electro...",Electronics,3,I thought it would be as big as small paper bu...
1,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,This kindle is light and easy to use especiall...
2,"Computers,Electronics Features,Tablets,Electro...",Electronics,4,Didnt know how much i'd use a kindle so went f...
3,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,I am 100 happy with my purchase. I caught it o...
4,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,Solid entry level Kindle. Great for kids. Gift...


# Traditional ML Model Approach

In [68]:
# Data cleaning and preprocessing
# Remove punctuation
pattern = f"[{re.escape(string.punctuation)}]"

chosen_cols = reduced_review_df['reviews.text']
no_punkt_reviews = chosen_cols.apply(lambda rev: re.sub(pattern, "", rev))
no_punkt_reviews

Unnamed: 0,reviews.text
0,I thought it would be as big as small paper bu...
1,This kindle is light and easy to use especiall...
2,Didnt know how much id use a kindle so went fo...
3,I am 100 happy with my purchase I caught it on...
4,Solid entry level Kindle Great for kids Gifted...
...,...
4995,This is a great tablet for the price Amazon is...
4996,This tablet is the perfect size and so easy to...
4997,Purchased this for my son Has room to upgrade ...
4998,I had some thoughts about getting this for a 5...


In [74]:
import re

def clean_special_syllables(text):
    # Eliminar palabras que contienen caracteres especiales no alfanuméricos como ä, ú, etc.
    cleaned_text = re.sub(r'\b\w*[^a-zA-Z0-9\s]+\w*\b', '', text)
    # Remover espacios adicionales
    # cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    return cleaned_text

# Aplicar la función al dataset
clean_special_chars = no_punkt_reviews.apply(lambda rev: clean_special_syllables(rev))
# clean_special_chars = no_punkt_reviews['reviews.text'].apply(clean_special_syllables)
clean_special_chars

Unnamed: 0,reviews.text
0,I thought it would be as big as small paper bu...
1,This kindle is light and easy to use especiall...
2,Didnt know how much id use a kindle so went fo...
3,I am 100 happy with my purchase I caught it on...
4,Solid entry level Kindle Great for kids Gifted...
...,...
4995,This is a great tablet for the price Amazon is...
4996,This tablet is the perfect size and so easy to...
4997,Purchased this for my son Has room to upgrade ...
4998,I had some thoughts about getting this for a 5...


In [75]:
# Convert entire text into lowercase for consistency
lower_case = clean_special_chars.apply(lambda rev: rev.lower())
lower_case

Unnamed: 0,reviews.text
0,i thought it would be as big as small paper bu...
1,this kindle is light and easy to use especiall...
2,didnt know how much id use a kindle so went fo...
3,i am 100 happy with my purchase i caught it on...
4,solid entry level kindle great for kids gifted...
...,...
4995,this is a great tablet for the price amazon is...
4996,this tablet is the perfect size and so easy to...
4997,purchased this for my son has room to upgrade ...
4998,i had some thoughts about getting this for a 5...


In [59]:
# Convert entire text into lowercase for consistency
# lower_case = no_punkt_reviews.apply(lambda rev: rev.lower())
# lower_case

Unnamed: 0,reviews.text
0,i thought it would be as big as small paper bu...
1,this kindle is light and easy to use especiall...
2,didnt know how much id use a kindle so went fo...
3,i am 100 happy with my purchase i caught it on...
4,solid entry level kindle great for kids gifted...
...,...
4995,this is a great tablet for the price amazon is...
4996,this tablet is the perfect size and so easy to...
4997,purchased this for my son has room to upgrade ...
4998,i had some thoughts about getting this for a 5...


In [76]:
# Tokenize words
tokenized_reviews = lower_case.apply(lambda rev: word_tokenize(rev))

# Check output
tokenized_reviews

Unnamed: 0,reviews.text
0,"[i, thought, it, would, be, as, big, as, small..."
1,"[this, kindle, is, light, and, easy, to, use, ..."
2,"[didnt, know, how, much, id, use, a, kindle, s..."
3,"[i, am, 100, happy, with, my, purchase, i, cau..."
4,"[solid, entry, level, kindle, great, for, kids..."
...,...
4995,"[this, is, a, great, tablet, for, the, price, ..."
4996,"[this, tablet, is, the, perfect, size, and, so..."
4997,"[purchased, this, for, my, son, has, room, to,..."
4998,"[i, had, some, thoughts, about, getting, this,..."


In [77]:
# Check for stop words
stop_words = set(stopwords.words('english'))

# Remove stops words
no_stopwords_reviews = tokenized_reviews.apply(lambda rev: [word for word in rev if word.lower() not in stop_words])

no_stopwords_reviews

Unnamed: 0,reviews.text
0,"[thought, would, big, small, paper, turn, like..."
1,"[kindle, light, easy, use, especially, beach]"
2,"[didnt, know, much, id, use, kindle, went, low..."
3,"[100, happy, purchase, caught, sale, really, g..."
4,"[solid, entry, level, kindle, great, kids, gif..."
...,...
4995,"[great, tablet, price, amazon, good, job]"
4996,"[tablet, perfect, size, easy, use, read, play,..."
4997,"[purchased, son, room, upgrade, memory, allow,..."
4998,"[thoughts, getting, 5, year, old, get, screen,..."


In [78]:
# Lemmatize words
wordnet_lemma = WordNetLemmatizer()

lemmatized_reviews = no_stopwords_reviews.apply(lambda rev: [wordnet_lemma.lemmatize(word, pos='v') for word in rev])

lemmatized_reviews

Unnamed: 0,reviews.text
0,"[think, would, big, small, paper, turn, like, ..."
1,"[kindle, light, easy, use, especially, beach]"
2,"[didnt, know, much, id, use, kindle, go, lower..."
3,"[100, happy, purchase, catch, sale, really, go..."
4,"[solid, entry, level, kindle, great, kid, gift..."
...,...
4995,"[great, tablet, price, amazon, good, job]"
4996,"[tablet, perfect, size, easy, use, read, play,..."
4997,"[purchase, son, room, upgrade, memory, allow, ..."
4998,"[thoughts, get, 5, year, old, get, screen, pro..."


In [79]:
# Join processed reviews
processed_reviews = lemmatized_reviews.apply(lambda tokens:" ".join(tokens))
processed_reviews

Unnamed: 0,reviews.text
0,think would big small paper turn like palm thi...
1,kindle light easy use especially beach
2,didnt know much id use kindle go lower end im ...
3,100 happy purchase catch sale really good pric...
4,solid entry level kindle great kid gift kid fr...
...,...
4995,great tablet price amazon good job
4996,tablet perfect size easy use read play game pu...
4997,purchase son room upgrade memory allow book ga...
4998,thoughts get 5 year old get screen protector c...


In [94]:
# Vectorizartion to convert textual data into numerical vectors
# tfidf_vectorizer = TfidfVectorizer(ngram_range=(1,2),  max_features=5000)
tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')

tfidf_matrix = tfidf_vectorizer.fit_transform(processed_reviews)

vocab = tfidf_vectorizer.get_feature_names_out()

print(vocab)

['10' '100' '1000' ... 'zone' 'zoo' 'zoom']


In [95]:
#Create document term matrix representing the frequency of the words
doc_term_matrix_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=vocab)

In [96]:
doc_term_matrix_tfidf

Unnamed: 0,10,100,1000,1000s,101,1012,1013,105,1080,10th,...,youve,yr,yrs,zero,zigbee,zinio,zippy,zone,zoo,zoom
0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.134614,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4996,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4997,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4998,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [91]:
# Drop the original 'review.text' column from the DataFrame
reduced_review_df.drop('reviews.text', axis=1, inplace=True)

# Concatenate the TF-IDF DataFrame with the original DataFrame (which no longer contains 'review.text')
reduced_review_df = pd.concat([reduced_review_df, doc_term_matrix_tfidf], axis=1)

KeyError: "['reviews.text'] not found in axis"

## Model Selection

In [25]:
# Test the following models to see which is best by using an ML pipeline with GridSearchCV:
models = {
    # Naive-Bayes
    'Naive-Bayes' : MultinomialNB(),

    # Logistic Regression
    'Logistic Regression' : LogisticRegression(),

    # Random Forest
    'Random Forest' : RandomForestClassifier(),

    # Support Vector Machine
    'Support Vector Machine' : SVC(),
}

In [26]:
# Set up GridSearchCV
param_grid = {
    'Naive-Bayes': {
        'classifier__alpha': [0.5, 1.0, 2.0],
    },
    'Logistic Regression': {
        'classifier__C': [0.1, 1, 10],
        'classifier__solver': ['liblinear', 'saga'],
    },
    'Random Forest': {
        'classifier__n_estimators': [50, 100, 200],
        'classifier__max_depth': [None, 10, 20],
        'classifier__min_samples_split': [2, 5],
    },
    'Support Vector Machine': {
        'classifier__C': [0.1, 1, 10],
        'classifier__kernel': ['linear', 'rbf'],
    },
}

## Model Training

In [87]:
display(reduced_review_df.head())

Unnamed: 0,categories,primaryCategories,reviews.rating,10 15,10 dollars,10 minutes,10 read,10 tons,10 way,10 year,...,youtube read,youtube take,youtube television,youtube video,youtube videos,youtube work,youve already,youve receive,yr old,yrs old
0,"Computers,Electronics Features,Tablets,Electro...",Electronics,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Computers,Electronics Features,Tablets,Electro...",Electronics,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Computers,Electronics Features,Tablets,Electro...",Electronics,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
def map_sentiment(rating):
    if rating in [1, 2, 3]:
        return 'Negativo'
    elif rating == 4:
        return 'Neutral'
    elif rating == 5:
        return 'Positivo'

In [92]:
# Partition data into train and test

X = tfidf_matrix
y = reduced_review_df['reviews.rating'].apply(map_sentiment)

# Split data into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(4000, 5000)
(1000, 5000)
(4000,)
(1000,)


In [99]:
# Loop through the models to set up the pipeline and perform GridSearchCV
for model_name, model in models.items():
    # Create a pipeline for each model
    if model_name == 'Naive-Bayes':  # Check if the current model is Naive-Bayes
        pipeline = Pipeline([
            # Remove StandardScaler for Naive-Bayes
            ('classifier', model)
        ])
    else:
        pipeline = Pipeline([
            ('scaler', StandardScaler(with_mean=False)),  # Disable centering for sparse data
            ('classifier', model)
        ])
    # Set up GridSearchCV for the current model
    grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='accuracy')

    # Fit the grid search
    grid_search.fit(X_train, y_train)

    # Best parameters and model
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    print(f"Best score for {model_name}: {grid_search.best_score_}")

    # Predict and evaluate the model
    y_pred = grid_search.predict(X_test)
    print(f"Accuracy on test set for {model_name}: {accuracy_score(y_test, y_pred)}\n")

Best parameters for Naive-Bayes: {'classifier__alpha': 0.5}
Best score for Naive-Bayes: 0.7260000000000001
Accuracy on test set for Naive-Bayes: 0.737





Best parameters for Logistic Regression: {'classifier__C': 0.1, 'classifier__solver': 'saga'}
Best score for Logistic Regression: 0.702
Accuracy on test set for Logistic Regression: 0.716

Best parameters for Random Forest: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50}
Best score for Random Forest: 0.7589999999999999
Accuracy on test set for Random Forest: 0.772

Best parameters for Support Vector Machine: {'classifier__C': 1, 'classifier__kernel': 'rbf'}
Best score for Support Vector Machine: 0.73575
Accuracy on test set for Support Vector Machine: 0.759



In [98]:
# Train and Test Neural Networks
from sklearn.neural_network import MLPClassifier

# Define and train the neural network
nn_model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500)
nn_model.fit(X_train, y_train)

# Predict and evaluate
y_pred = nn_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    Negativo       0.83      0.46      0.59        63
     Neutral       0.48      0.46      0.47       234
    Positivo       0.80      0.84      0.82       703

    accuracy                           0.73      1000
   macro avg       0.70      0.59      0.63      1000
weighted avg       0.73      0.73      0.73      1000



## Select best model based on accuracy, precision, recall, and F1-score.
The best model for this sentiment analysis task is:

1. Best parameters for Naive-Bayes: {'classifier__alpha': 0.5}
- Best score for Naive-Bayes: 0.7074285714285715
- Accuracy on test set for Naive-Bayes: 0.7046666666666667

2. Best parameters for Logistic Regression: {'classifier__C': 1, 'classifier__solver': 'saga'}
- Best score for Logistic Regression: 0.64
- Accuracy on test set for Logistic Regression: 0.6706666666666666

3. Best parameters for Random Forest: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 200}
- Best score for Random Forest: 0.7237142857142856
- Accuracy on test set for Random Forest: 0.7406666666666667

4. Best parameters for Support Vector Machine: {'classifier__C': 1, 'classifier__kernel': 'rbf'}
- Best score for Support Vector Machine: 0.7337142857142858
- Accuracy on test set for Support Vector Machine: 0.7366666666666667


## Model Metrics

In [None]:
# Evaluate model performance on seperate test dataset based on various evaluation metrics
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           1       1.00      0.19      0.32        16
           2       1.00      0.05      0.09        22
           3       1.00      0.09      0.17        65
           4       0.95      0.15      0.26       353
           5       0.73      1.00      0.84      1044

    accuracy                           0.74      1500
   macro avg       0.93      0.30      0.34      1500
weighted avg       0.80      0.74      0.66      1500



In [None]:
#Output preview
"""
Model achieve an accuracy of X% on the test dataset.
Precision, recall, and F1-score for each class are as follows:
Class Positive: Precision=X%, Recall=X%, F1-score=X%
Class Negative: Precision=X%, Recall=X%, F1-score=X%
Class Neutral: Precision=X%, Recall=X%, F1-score=X%
Confusion matrix showing table and graphical representations
"""