# **1. Import the necessary packeges**

In [35]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.metrics import classification_report

# **2. Read and explore the dataset**

## 2.1 Read and explore the train dataset

In [36]:
movie_reviews_training = pd.read_csv("movie_reviews_train.csv", on_bad_lines='skip')

In [68]:
movie_reviews_training

Unnamed: 0,Content,Label
0,every once in a while you see a film that is s...,1
1,the love for family is one of the strongest dr...,1
2,after the terminally bleak reservoir dogs and ...,1
3,( warning to those who have not seen seven : ...,1
4,"having not seen , "" who framed roger rabbit "" ...",1
...,...,...
1795,""" holy man "" boasts a sweet , gentle , comic ...",0
1796,alexander dumas' the three musketeers is one o...,0
1797,""" have you ever heard the one about a movie s...",0
1798,this is the first film in what would become th...,0


In [82]:
movie_reviews_training['Label'].value_counts()

Unnamed: 0_level_0,count
Label,Unnamed: 1_level_1
1,900
0,900


## 2.2 Read and explore the test dataset

In [39]:
movie_reviews_testing = pd.read_csv("movie_reviews_test.csv")

In [69]:
movie_reviews_testing

Unnamed: 0,Content,Label
0,hedwig ( john cameron mitchell ) was born a bo...,1
1,one of the more unusual and suggestively viole...,1
2,what do you get when you combine clueless and ...,1
3,>from the man who presented us with henry : th...,1
4,tibet has entered the american consciousness s...,1
...,...,...
195,my inner flag was at half-mast last year when ...,0
196,"if anything , "" stigmata "" should be taken as ...",0
197,woof ! too bad that leap of faith was the titl...,0
198,the plot of big momma's house is martin lawren...,0


# **3. Preprocessing-Feature Extraction**

In [70]:
extractor = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)

In [71]:
train_features = extractor.fit_transform(movie_reviews_training["Content"])

In [72]:
test_features = extractor.transform (movie_reviews_testing["Content"])

In [73]:
train_features.shape

(1800, 12495)

In [74]:
test_features.shape

(200, 12495)

# **4. Create SVM model**

## 4.1 Construct the model

In [47]:
svm_model = svm.SVC(kernel='linear')

## 4.2 Train the model

In [48]:
svm_model.fit(train_features, movie_reviews_training["Label"])
print("Done!")

Done!


## 4.3 Model Prediction

In [49]:
predictions = svm_model.predict(test_features)

##4.4 Model Evaluation

In [50]:
target_names = ['Negative', 'Positive']

In [51]:
summary = classification_report(movie_reviews_testing['Label'], predictions, target_names=target_names)

In [52]:
print(summary)

              precision    recall  f1-score   support

    Negative       0.91      0.92      0.92       100
    Positive       0.92      0.91      0.91       100

    accuracy                           0.92       200
   macro avg       0.92      0.92      0.91       200
weighted avg       0.92      0.92      0.91       200



Use SVM model to classify the following review:


In [53]:
my_review = "I hate this product. It is not working at all. It is so bad and is not suitable for my problem."

1. Feature Extraction:

In [75]:
my_review_features = extractor.transform([my_review])

2. Prediction use SVM

In [76]:
print(svm_model.predict(my_review_features))

['pos']


text Use SVM model to classify the following review:


In [56]:
my_review = "I love this product. It is very effective in solving my problem!"

1. Feature Extraction:

In [77]:
my_review_features = extractor.transform([my_review])

2. Prediction use SVM

In [78]:
print(svm_model.predict(my_review_features))

['pos']


# **5. Create Ridge model**

In [181]:
from sklearn.linear_model import RidgeClassifier

ridge_model = RidgeClassifier()
ridge_model.fit(train_features, movie_reviews_training["Label"])

ridge_predictions = ridge_model.predict(test_features)

In [None]:
ridge_summary = classification_report(movie_reviews_testing['Label'], ridge_predictions, target_names=target_names)
print(ridge_summary)

In [None]:
my_review = "I hate this product. It is not working at all. It is so bad and is not suitable for my problem."
my_review_features = extractor.transform([my_review])
print(ridge_model.predict(my_review_features))

In [None]:
my_review = "I love this product. It is very effective in solving my problem!"
my_review_features = extractor.transform([my_review])
print(ridge_model.predict(my_review_features))