##**Using SVM Model**##

##**Importing required library**##

In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

##**Load required dataset**##

In [47]:
data = pd.read_csv('/content/drive/MyDrive/Code Clause /movie_reviews.csv')

In [48]:
data.head()

Unnamed: 0,Summary,Sentiment
0,rock destined st century new conan going make ...,1
1,gorgeously elaborate continuation lord ring tr...,1
2,effective tepid biopic,1
3,sometimes like go movie fun wasabi good place ...,1
4,emerges something rare issue movie honest keen...,1


##**Splitting the dataset into training and testing**##

In [49]:
train_data, test_data, train_labels, test_labels = train_test_split(data['Summary'], data['Sentiment'], test_size=0.2, random_state=42)

##**Vectorize the text data using TF-IDF**##

In [50]:
vectorizer = TfidfVectorizer()
train_vectors = vectorizer.fit_transform(train_data)
test_vectors = vectorizer.transform(test_data)

##**Trainning the SVM model on the training data**##

In [51]:
svm = SVC(kernel='linear')
svm.fit(train_vectors, train_labels)

SVC(kernel='linear')

##**Predictions on the testing data**##

In [52]:
predictions = svm.predict(test_vectors)

In [53]:
predictions

array([0, 1, 0, ..., 1, 1, 0])

##**Evaluate the accuracy of the model**##

In [54]:
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 75.34%


##**Sentiment analysis using a random forest classifier**##

In [55]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

##**Train a random forest classifier on the training data**##

In [56]:
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfc.fit(train_vectors, train_labels)

RandomForestClassifier(random_state=42)

##**Make predictions on the testing data**##

In [57]:
predictions = rfc.predict(test_vectors)

In [58]:
predictions

array([0, 1, 0, ..., 1, 1, 0])

##**Evaluate the accuracy of the model**##

In [59]:
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 68.82%


##**Sentiment analysis using a LSTM (Long Short-Term Memory) neural network**##

In [60]:
!pip install keras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

##**Tokenize the text data**##

In [62]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data)
train_sequences = tokenizer.texts_to_sequences(train_data)
test_sequences = tokenizer.texts_to_sequences(test_data)

##**Pad the sequences to a fixed length**##

In [63]:
max_length = 100
train_vectors = pad_sequences(train_sequences, maxlen=max_length)
test_vectors = pad_sequences(test_sequences, maxlen=max_length)

##**Define the LSTM model**##

In [64]:
embedding_dim = 100
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_length))
model.add(LSTM(units=128))
model.add(Dense(units=1, activation='sigmoid'))

##**Compile the model**##

In [65]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

##**Train the model**##

In [66]:
model.fit(train_vectors, train_labels, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fed70fb29d0>

##**Evaluate the model**##

In [67]:
accuracy = model.evaluate(test_vectors, test_labels)[1]
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 70.84%
