In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [2]:
# Load dataset
data = pd.read_csv('APPLE_iphone_.csv') 
data.head()

Unnamed: 0,Ratings,Comment,Reviews
0,5,Super!,Great camera for pics and videos Battery life ...
1,5,Must buy!,Great device. Let me tell the Pros..1. Superb ...
2,5,Great product,"Who all loves older size i.e., 4.7 inch type s..."
3,5,Simply awesome,This iPhone SE is the best phone ever you get....
4,5,Classy product,This is my second iphone after iphone 4s. I’ve...


In [3]:
data.shape

(9713, 3)

In [4]:
data.size

29139

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9713 entries, 0 to 9712
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Ratings  9713 non-null   int64 
 1   Comment  9713 non-null   object
 2   Reviews  9713 non-null   object
dtypes: int64(1), object(2)
memory usage: 227.8+ KB


In [6]:
data.isnull().sum()

Ratings    0
Comment    0
Reviews    0
dtype: int64

In [7]:
data.duplicated().sum()

649

In [8]:
data.drop_duplicates()

Unnamed: 0,Ratings,Comment,Reviews
0,5,Super!,Great camera for pics and videos Battery life ...
1,5,Must buy!,Great device. Let me tell the Pros..1. Superb ...
2,5,Great product,"Who all loves older size i.e., 4.7 inch type s..."
3,5,Simply awesome,This iPhone SE is the best phone ever you get....
4,5,Classy product,This is my second iphone after iphone 4s. I’ve...
...,...,...,...
9708,5,Terrific purchase,Absolutely brilliantREAD MORE
9709,5,Classy product,"Superb phone. This is my 4th iPhone, I feel SE..."
9710,5,Awesome,very niceREAD MORE
9711,5,Super!,Loving it as of now. Good Product .READ MORE


In [9]:
data['Sentiment'] = data['Ratings'].apply(lambda x: 'Positive' if x in [3, 4, 5] else 'Negative') #positive and neutral taken as positive

In [10]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['Reviews'], data['Sentiment'], test_size=0.2, random_state=42)

In [11]:
# Feature extraction using TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=10000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [12]:
# Model training (Support Vector Machine classifier)
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_tfidf, y_train)

In [13]:
# Model evaluation
y_pred = svm_classifier.predict(X_test_tfidf)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    Negative       0.62      0.30      0.40       127
    Positive       0.95      0.99      0.97      1816

    accuracy                           0.94      1943
   macro avg       0.79      0.64      0.69      1943
weighted avg       0.93      0.94      0.93      1943



In [18]:
# Function to map sentiment to ratings based on text content
def map_sentiment_to_rating(sentiment, text):
    if sentiment == 'Positive':
        if 'excellent' in text.lower() or 'highly recommend' in text.lower():
            return [5]
        elif 'good' in text.lower() or 'satisfied' in text.lower():
            return [4]
        else:
            return [3]  # Default positive rating
    else:
        if 'poor quality' in text.lower() or 'disappointed' in text.lower() or 'unsatisfied' in text.lower():
            return [1]
        elif 'bad' in text.lower() or 'not good' in text.lower():
            return [2]
        else:
            return [1]  # Default negative rating

In [33]:
# Predict sentiment on new data
new_texts = ["Great phone, highly recommend it!", "Poor quality, disappointed with the purchase.","happy", "unsatisfied", "bad experience","satisfied"]
new_texts_tfidf = tfidf_vectorizer.transform(new_texts)
predictions = svm_classifier.predict(new_texts_tfidf)

In [34]:
# Map predictions to ratings
for text, prediction in zip(new_texts, predictions):
    predicted_rating = map_sentiment_to_rating(prediction, text)
    print(f"Text: {text}\nPredicted Sentiment: {prediction} Predicted Rating: {predicted_rating}\n")


Text: Great phone, highly recommend it!
Predicted Sentiment: Positive Predicted Rating: [5]

Text: Poor quality, disappointed with the purchase.
Predicted Sentiment: Negative Predicted Rating: [1]

Text: happy
Predicted Sentiment: Positive Predicted Rating: [3]

Text: unsatisfied
Predicted Sentiment: Negative Predicted Rating: [1]

Text: bad experience
Predicted Sentiment: Negative Predicted Rating: [2]

Text: satisfied
Predicted Sentiment: Positive Predicted Rating: [4]

