<a href="https://colab.research.google.com/github/GDharan10/Dataset21_AmazonReviews_/blob/main/AmazonReviews.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Installation**

In [None]:
pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12


# **Librarys**

In [None]:
import pandas as pd
import numpy as np

#Connections
from google.colab import drive

#Preprocessing
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler, OneHotEncoder, StandardScaler

from imblearn.under_sampling import NearMiss
from imblearn.combine import SMOTETomek

#Machine Learning identifying algorithms/Model
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyRegressor, LazyClassifier

#Machine Learning Training

#Machine Learning Evaluation
from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error, r2_score # Regression
from sklearn.metrics import top_k_accuracy_score, precision_score, recall_score # Classification

# **Connections**

In [None]:
#Google drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Loading dataset**

In [None]:
#Google drive
file_path = '/content/drive/My Drive/DTM15/Dataset/amazonreviews.csv'
df = pd.read_csv(file_path,sep="\t")

In [None]:
df.head()

Unnamed: 0,label,review
0,pos,Stuning even for the non-gamer: This sound tra...
1,pos,The best soundtrack ever to anything.: I'm rea...
2,pos,Amazing!: This soundtrack is my favorite music...
3,pos,Excellent Soundtrack: I truly like this soundt...
4,pos,"Remember, Pull Your Jaw Off The Floor After He..."


In [None]:
df.shape

(10000, 2)

# **Data cleaning using pandas**


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   label   10000 non-null  object
 1   review  10000 non-null  object
dtypes: object(2)
memory usage: 156.4+ KB


In [None]:
df.isnull().sum()

label     0
review    0
dtype: int64

In [None]:
df.duplicated().sum()

0

In [None]:
df.label.value_counts()

label
0.00    5097
1.00    4903
Name: count, dtype: int64

# **Preprocessing**

In [None]:
df.label.unique()

array(['pos', 'neg'], dtype=object)

In [None]:
# Using encoder
encode=OrdinalEncoder()
df.label=encode.fit_transform(df[["label"]])

# **NLP**

In [None]:
import nltk
nltk.download("vader_lexicon")

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

In [None]:
df["scores"]=df["review"].apply(lambda review: sid.polarity_scores(review))
df

Unnamed: 0,label,review,scores
0,1.00,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co..."
1,1.00,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co..."
2,1.00,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com..."
3,1.00,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com..."
4,1.00,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp..."
...,...,...,...
9995,1.00,A revelation of life in small town America in ...,"{'neg': 0.017, 'neu': 0.846, 'pos': 0.136, 'co..."
9996,1.00,Great biography of a very interesting journali...,"{'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'comp..."
9997,0.00,Interesting Subject; Poor Presentation: You'd ...,"{'neg': 0.084, 'neu': 0.754, 'pos': 0.162, 'co..."
9998,0.00,Don't buy: The box looked used and it is obvio...,"{'neg': 0.091, 'neu': 0.909, 'pos': 0.0, 'comp..."


In [None]:
df["compound"] = df["scores"].apply(lambda x:x["compound"])
df

Unnamed: 0,label,review,scores,compound
0,1.00,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.95
1,1.00,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.90
2,1.00,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.99
3,1.00,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.98
4,1.00,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.98
...,...,...,...,...
9995,1.00,A revelation of life in small town America in ...,"{'neg': 0.017, 'neu': 0.846, 'pos': 0.136, 'co...",0.96
9996,1.00,Great biography of a very interesting journali...,"{'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'comp...",0.95
9997,0.00,Interesting Subject; Poor Presentation: You'd ...,"{'neg': 0.084, 'neu': 0.754, 'pos': 0.162, 'co...",0.91
9998,0.00,Don't buy: The box looked used and it is obvio...,"{'neg': 0.091, 'neu': 0.909, 'pos': 0.0, 'comp...",-0.36


In [None]:
df["comp_out"] = df["compound"].apply(lambda score : 1 if score >=0.6 else 0)
df.head()

Unnamed: 0,label,review,scores,compound,comp_out
0,1.0,Stuning even for the non-gamer: This sound tra...,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.95,1
1,1.0,The best soundtrack ever to anything.: I'm rea...,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.9,1
2,1.0,Amazing!: This soundtrack is my favorite music...,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.99,1
3,1.0,Excellent Soundtrack: I truly like this soundt...,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.98,1
4,1.0,"Remember, Pull Your Jaw Off The Floor After He...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.98,1


In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(df["label"],df["comp_out"])

0.7523

# **Machine Learning**