In [None]:
import tensorflow as tf
# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Conv1D, MaxPooling1D,Dropout
from keras.layers import Dense, Activation, BatchNormalization, Flatten
from keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from sklearn.utils import resample

# Set constants
MAX_SEQUENCE_LENGTH = 500  # Set your desired sequence length
EMBEDDING_DIM = 100  # Adjust as needed
QA_EMBED_SIZE = 64
DROPOUT_RATE = 0.2

# Load IMDb dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

# Pad sequences
train_data = pad_sequences(train_data, maxlen=MAX_SEQUENCE_LENGTH)
test_data = pad_sequences(test_data, maxlen=MAX_SEQUENCE_LENGTH)

with strategy.scope():
    # Define a function to create the model
    def model1():
        model = Sequential()
        model.add(Embedding(input_dim=10000, output_dim=EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH))
        model.add(Bidirectional(LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)))
        model.add(Conv1D(filters=128, kernel_size=3, padding='valid', activation='relu'))
        model.add(Dropout(0.2))
        model.add(BatchNormalization())
        model.add(Conv1D(filters=64, kernel_size=3, padding='valid', activation='relu'))
        model.add(Activation('relu'))
        model.add(MaxPooling1D(4))
        model.add(Flatten())
        model.add(Dropout(0.2))
        model.add(Dense(128))
        model.add(Activation('relu'))
        model.add(Dense(1))
        model.add(Activation("sigmoid"))
        model.compile(optimizer=Adam(learning_rate=1e-3), loss='binary_crossentropy', metrics=['accuracy'])
        return model

    modelt1 = model1()
    modelt1.fit(train_data, train_labels, batch_size=128, epochs=4, validation_split=0.2)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model
from keras.layers import Embedding, Reshape, Dropout, Input, Conv2D, Activation, MaxPooling2D, concatenate, Flatten, Dense, BatchNormalization
from keras import optimizers

# Set the parameters
num_features = 5000
sequence_length = 500
embedding_dimension = 200

# Load the IMDb dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_features)

# Pad sequences to a fixed length
X_train = pad_sequences(X_train, maxlen=sequence_length)
X_test =  pad_sequences(X_test, maxlen=sequence_length)


filter_sizes = [2, 3, 4, 5]



# Define the convolutional model
def convolution():
    inn = Input(shape=(sequence_length, embedding_dimension, 1))

    convolutions = []

    # Conduct three convolutions and poolings then concatenate them
    for f in filter_sizes:
        conv = Conv2D(filters=200, kernel_size=(f, embedding_dimension))(inn)
        non_linearity = Activation('relu')(conv)
        max_pool = MaxPooling2D(pool_size=(sequence_length - f + 1, 1))(non_linearity)
        convolutions.append(max_pool)

    out = concatenate(convolutions)
    model = Model(inputs=inn, outputs=out)

    return model

# Define the IMDb CNN model
def model2():
    model = Sequential()
    model.add(Embedding(input_dim=num_features, output_dim=embedding_dimension))
    model.add(Reshape((sequence_length, embedding_dimension, 1), input_shape=(sequence_length, embedding_dimension)))
#     model.add(Dropout(0.2))
    model.add(convolution())
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(8))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.1))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    adam = optimizers.Adam()

    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

    return model

with strategy.scope():

  modelt2 = model2()
  modelt2.fit(X_train, y_train, batch_size=64, epochs=10, validation_split=0.2)




In [None]:
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, MaxPooling1D, GRU, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

with strategy.scope():
  # Load and preprocess IMDb dataset
  (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)
  x_train = pad_sequences(x_train, maxlen=500)
  x_test = pad_sequences(x_test, maxlen=500)

  def model3():

    # Build the model
    model = Sequential()

    # Embedding layer
    model.add(Embedding(input_dim=10000, output_dim=200))

    # Convolutional layer
    model.add(Conv1D(filters=128, kernel_size=3, padding='valid', activation='relu'))
    model.add(Dropout(0.35))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=64, kernel_size=3, padding='valid', activation='relu'))
    model.add(MaxPooling1D(4))

    # GRU layer
    model.add(GRU(250))
    model.add(Dropout(0.35))

    # Dense layer for classification
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

  modelt3 = model3()

    # Train the model
  modelt3.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2)

In [None]:
from sklearn.metrics import accuracy_score

# Assuming you have three trained models: modelt1, modelt2, and modelt3

# Get predictions from each individual model on the test set
predictions_model1 = modelt1.predict(X_test)
predictions_model2 = modelt2.predict(X_test)
predictions_model3 = modelt3.predict(X_test)


In [None]:
#Weighted Average Ensemble

# Define weights for each model (you can adjust these weights based on performance on a validation set)
weights = [0.2, 0.5, 0.3]

# Calculate weighted average prediction
ensemble_predictions = (weights[0] * predictions_model1 +
                        weights[1] * predictions_model2 +
                        weights[2] * predictions_model3)

# Convert to binary predictions
ensemble_binary = (ensemble_predictions >= 0.5).astype(int)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, ensemble_binary)
print("Ensemble Model Accuracy (Weighted Average Ensemble):", accuracy)

In [None]:
#Hard Voting ensemble

import numpy as np
from sklearn.metrics import accuracy_score

# Make predictions with each model
predictions_model1 = modelt1.predict(X_test)
predictions_model2 = modelt2.predict(X_test)
predictions_model3 = modelt3.predict(X_test)

# Calculate the absolute differences from 0.5 for each model's prediction
prediction1 = abs(0.53 - predictions_model1)
prediction2 = abs(0.53- predictions_model2)
prediction3 = abs(0.53- predictions_model3)

# Initialize ensemble predictions as an array of zeros
ensemble_predictions = np.zeros_like(predictions_model1)

# Determine the indices where each model's prediction has the maximum difference from 0.5
max_indices_model1 = np.where(prediction1 >= prediction2, prediction1, 0)
max_indices_model1 = np.where(prediction1 >= prediction3, max_indices_model1, 0)

max_indices_model2 = np.where(prediction2 >= prediction1, prediction2, 0)
max_indices_model2 = np.where(prediction2 >= prediction3, max_indices_model2, 0)

max_indices_model3 = np.where(prediction3 >= prediction1, prediction3, 0)
max_indices_model3 = np.where(prediction3 >= prediction2, max_indices_model3, 0)

# Combine the maximum indices across all models
ensemble_predictions = np.where(max_indices_model1 != 0, predictions_model1, ensemble_predictions)
ensemble_predictions = np.where(max_indices_model2 != 0, predictions_model2, ensemble_predictions)
ensemble_predictions = np.where(max_indices_model3 != 0, predictions_model3, ensemble_predictions)

# Convert to binary predictions
ensemble_binary = (ensemble_predictions >= 0.5).astype(int)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, ensemble_binary)
print("Ensemble Model Accuracy (Max Voting):", accuracy)


In [None]:
#Stacking using Logistic Regression

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Generate predictions from each model on the training data
train_predictions_model1 = modelt1.predict(X_train)
train_predictions_model2 = modelt2.predict(X_train)
train_predictions_model3 = modelt3.predict(X_train)

# Stack predictions together as features for the meta-model
stacked_train_predictions = np.column_stack((train_predictions_model1, train_predictions_model2, train_predictions_model3))

# Train the meta-model
meta_model = LogisticRegression()
meta_model.fit(stacked_train_predictions, y_train)

# Generate predictions from each model on the test data
test_predictions_model1 = modelt1.predict(X_test)
test_predictions_model2 = modelt2.predict(X_test)
test_predictions_model3 = modelt3.predict(X_test)

# Stack predictions together as features for the meta-model
stacked_test_predictions = np.column_stack((test_predictions_model1, test_predictions_model2, test_predictions_model3))

# Make final predictions with the meta-model
final_predictions = meta_model.predict(stacked_test_predictions)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, final_predictions)
print("Ensemble Model Accuracy (Stacking):", accuracy)


In [None]:
#Stacking using SVM

import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Generate predictions from each model on the training data
train_predictions_model1 = modelt1.predict(X_train)
train_predictions_model2 = modelt2.predict(X_train)
train_predictions_model3 = modelt3.predict(X_train)

# Stack predictions together as features for the meta-model
stacked_train_predictions = np.column_stack((train_predictions_model1, train_predictions_model2, train_predictions_model3))

# Train the meta-model (SVM)
meta_model = SVC()
meta_model.fit(stacked_train_predictions, y_train)

# Generate predictions from each model on the test data
test_predictions_model1 = modelt1.predict(X_test)
test_predictions_model2 = modelt2.predict(X_test)
test_predictions_model3 = modelt3.predict(X_test)

# Stack predictions together as features for the meta-model
stacked_test_predictions = np.column_stack((test_predictions_model1, test_predictions_model2, test_predictions_model3))

# Make final predictions with the meta-model
final_predictions = meta_model.predict(stacked_test_predictions)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, final_predictions)
print("Ensemble Model Accuracy (Stacking with SVM):", accuracy)
