In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
print(keras.__version__)

2.8.2
2.8.0


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

dataset = pd.read_csv('/content/drive/MyDrive/datasets/movie.csv', sep=",")
X = dataset["text"]
y = dataset["label"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, shuffle=True, stratify=y)

X_train, y_train

(651      If you have read the book - do not set your ho...
 9452     The first one was the best. The second one suc...
 37943    Let's see how many ways you can insult my gend...
 24928    Snap, crackle, pop! The jarring sound of every...
 2286     Wallace and Gromit are the main characters in ...
                                ...                        
 19547    Some of the posters seem less than gruntled be...
 37183    I recently saw House of Wax and must say i rea...
 10506    Zipperface is the kind of experience one waits...
 22076    Like most musicals of the era, one must check ...
 12792    Having long disdained network television progr...
 Name: text, Length: 8000, dtype: object, 651      0
 9452     0
 37943    0
 24928    1
 2286     1
         ..
 19547    1
 37183    1
 10506    0
 22076    0
 12792    1
 Name: label, Length: 8000, dtype: int64)

## Ensenble 
### Bidirectional GRU 32
### SVM

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

vectorizer = TfidfVectorizer(stop_words='english')

X_train_sv = vectorizer.fit_transform(X_train)
clf1 = SVC(C=10, gamma=0.1, kernel="rbf", verbose=True)

clf1.fit(X_train_sv, y_train)


[LibSVM]

SVC(C=10, gamma=0.1, verbose=True)

In [6]:
from keras.models import Sequential
from keras.callbacks import History, EarlyStopping
from keras.layers import Embedding, TextVectorization, Dense, LSTM, Input, Bidirectional

vectorize_layer = TextVectorization(max_tokens=50000)
vectorize_layer.adapt(X_train)

history_Adam = History()

model = Sequential()

# Vectorization
model.add(Input(shape=(1,), dtype=tf.string))
model.add(vectorize_layer)

# Embedding
model.add(Embedding(input_dim=len(vectorize_layer.get_vocabulary()), output_dim=64))


model.add(Bidirectional(tf.keras.layers.GRU(32)))
model.add(Dense(24, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, None, 64)          3200000   
                                                                 
 bidirectional (Bidirectiona  (None, 64)               18816     
 l)                                                              
                                                                 
 dense (Dense)               (None, 24)                1560      
                                                                 
 dense_1 (Dense)             (None, 1)                 25        
                                                                 
Total params: 3,220,401
Trainable params: 3,220,401
Non-

In [7]:
model.compile(loss="binary_crossentropy",optimizer="adam", metrics=["accuracy"])

early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min', verbose=1)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=100, callbacks=[history_Adam, early_stopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 5: early stopping


<keras.callbacks.History at 0x7fa21cfdae90>

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.ensemble import VotingClassifier

def create_model():
  vectorize_layer = TextVectorization(max_tokens=50000)
  vectorize_layer.adapt(X_train)

  history_Adam = History()

  model = Sequential()

  # Vectorization
  model.add(Input(shape=(1,), dtype=tf.string))
  model.add(vectorize_layer)

  # Embedding
  model.add(Embedding(input_dim=len(vectorize_layer.get_vocabulary()), output_dim=64))


  model.add(Bidirectional(tf.keras.layers.GRU(32)))
  model.add(Dense(24, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))
  model.compile(loss="binary_crossentropy",optimizer="adam", metrics=["accuracy"])
  return model

keras_model = KerasClassifier(build_fn=create_model)
keras_model._estimator_type = "classifier"

steps = [('TF', TfidfVectorizer(stop_words='english')), ('SVM', SVC(C=10, gamma=0.1, kernel='rbf', probability=True))]
clf = Pipeline(steps)

eclf = VotingClassifier(estimators=[('svc', clf), ('keras_model', keras_model)] , voting='soft')

eclf.fit(X_train, y_train)



In [15]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np

for name, score in [("accuracy", accuracy_score), ("recall", recall_score), ("precision", precision_score), ("f1", f1_score)]:
    print(name, score(eclf.predict(X_test), y_test))


AttributeError: ignored