In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Dense

# Load IMDb dataset function
def load_imdb_dataset():
    # IMDb dataset loading (from imdb_master.csv)
    imdb_data = pd.read_csv('/kaggle/input/imdb-review-dataset/imdb_master.csv', encoding='latin-1')

    # Exclude unsupervised data
    imdb_data = imdb_data[imdb_data['label'] != 'unsup']

    # Extract reviews and labels
    imdb_reviews = imdb_data['review'].values
    imdb_labels = imdb_data['label'].map({'neg': 0, 'pos': 1}).values  # Map 'neg' to 0 and 'pos' to 1

    # Split data into training and testing (80/20 split)
    split_index = int(0.8 * len(imdb_reviews))
    imdb_train_data = imdb_reviews[:split_index]
    imdb_train_labels = imdb_labels[:split_index]
    imdb_test_data = imdb_reviews[split_index:]
    imdb_test_labels = imdb_labels[split_index:]

    # Tokenize and pad sequences
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
    tokenizer.fit_on_texts(imdb_train_data)

    imdb_train_data = tokenizer.texts_to_sequences(imdb_train_data)
    imdb_test_data = tokenizer.texts_to_sequences(imdb_test_data)

    maxlen = 250  # Maximum length of sequences
    imdb_train_data = tf.keras.preprocessing.sequence.pad_sequences(imdb_train_data, maxlen=maxlen)
    imdb_test_data = tf.keras.preprocessing.sequence.pad_sequences(imdb_test_data, maxlen=maxlen)

    return imdb_train_data, imdb_train_labels, imdb_test_data, imdb_test_labels

# Load HAR dataset function
def load_har_dataset():
    # Load HAR training and testing data
    har_train_data = pd.read_csv('/kaggle/input/human-activity-recognition-with-smartphones/train.csv')
    har_test_data = pd.read_csv('/kaggle/input/human-activity-recognition-with-smartphones/test.csv')

    # Extract features and labels
    har_train_labels = har_train_data.iloc[:, -1].values  # Labels are in the last column
    har_train_data = har_train_data.iloc[:, :-1].values   # Features are all columns except last
    har_test_labels = har_test_data.iloc[:, -1].values    # Labels
    har_test_data = har_test_data.iloc[:, :-1].values     # Features

    # Convert labels to integers starting from 0
    unique_labels = np.unique(np.concatenate((har_train_labels, har_test_labels)))
    label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
    har_train_labels = np.array([label_to_index[label] for label in har_train_labels])
    har_test_labels = np.array([label_to_index[label] for label in har_test_labels])

    return har_train_data, har_train_labels, har_test_data, har_test_labels

# Normalize HAR data
def normalize_data(data):
    return data / np.linalg.norm(data, axis=1, keepdims=True)

# Function to train and evaluate models
def train_model(X_train, y_train, X_test, y_test, input_shape, n_classes, model_type='RNN', units=32, epochs=5):
    model = Sequential()

    if len(input_shape) == 1:  # IMDb dataset
        model.add(Embedding(10000, 128, input_length=input_shape[0]))
        if model_type == 'RNN':
            model.add(SimpleRNN(units))
        elif model_type == 'LSTM':
            model.add(LSTM(units))
    else:  # HAR dataset
        if model_type == 'RNN':
            model.add(SimpleRNN(units, input_shape=input_shape))
        elif model_type == 'LSTM':
            model.add(LSTM(units, input_shape=input_shape))

    model.add(Dense(n_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=epochs, batch_size=64, validation_data=(X_test, y_test), verbose=1)

    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

# Load datasets
X_imdb_train, y_imdb_train, X_imdb_test, y_imdb_test = load_imdb_dataset()
X_har_train, y_har_train, X_har_test, y_har_test = load_har_dataset()

# Normalize HAR data
X_har_train = normalize_data(X_har_train)
X_har_test = normalize_data(X_har_test)

# Reshape HAR data for RNN/LSTM input
X_har_train = X_har_train.reshape((X_har_train.shape[0], X_har_train.shape[1], 1))
X_har_test = X_har_test.reshape((X_har_test.shape[0], X_har_test.shape[1], 1))

# Define input shapes and number of classes
input_shape_imdb = (X_imdb_train.shape[1],)
n_classes_imdb = 2

input_shape_har = (X_har_train.shape[1], 1)
n_classes_har = len(np.unique(y_har_train))

# Configurations to test
rnn_configs = [32, 64, 128]
lstm_configs = [32, 64, 128]

# Results storage
results = {
    'Dataset': [],
    'Model': [],
    'Units': [],
    'Accuracy': []
}

# Train and evaluate RNN models for IMDb dataset
for units in rnn_configs:
    accuracy = train_model(X_imdb_train, y_imdb_train, X_imdb_test, y_imdb_test,
                           input_shape_imdb, n_classes_imdb, model_type='RNN', units=units, epochs=5)
    results['Dataset'].append('IMDb')
    results['Model'].append('RNN')
    results['Units'].append(units)
    results['Accuracy'].append(accuracy)

# Train and evaluate LSTM models for IMDb dataset
for units in lstm_configs:
    accuracy = train_model(X_imdb_train, y_imdb_train, X_imdb_test, y_imdb_test,
                           input_shape_imdb, n_classes_imdb, model_type='LSTM', units=units, epochs=5)
    results['Dataset'].append('IMDb')
    results['Model'].append('LSTM')
    results['Units'].append(units)
    results['Accuracy'].append(accuracy)

# Train and evaluate RNN models for HAR dataset
for units in rnn_configs:
    accuracy = train_model(X_har_train, y_har_train, X_har_test, y_har_test,
                           input_shape_har, n_classes_har, model_type='RNN', units=units, epochs=5)
    results['Dataset'].append('HAR')
    results['Model'].append('RNN')
    results['Units'].append(units)
    results['Accuracy'].append(accuracy)

# Train and evaluate LSTM models for HAR dataset
for units in lstm_configs:
    accuracy = train_model(X_har_train, y_har_train, X_har_test, y_har_test,
                           input_shape_har, n_classes_har, model_type='LSTM', units=units, epochs=5)
    results['Dataset'].append('HAR')
    results['Model'].append('LSTM')
    results['Units'].append(units)
    results['Accuracy'].append(accuracy)

# Convert results to DataFrame and display
results_df = pd.DataFrame(results)
print(results_df)


Epoch 1/5




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 22ms/step - accuracy: 0.7101 - loss: 0.5524 - val_accuracy: 0.8279 - val_loss: 0.4542
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - accuracy: 0.8757 - loss: 0.3079 - val_accuracy: 0.6137 - val_loss: 0.9903
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - accuracy: 0.9223 - loss: 0.2057 - val_accuracy: 0.7824 - val_loss: 0.5944
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - accuracy: 0.9654 - loss: 0.1029 - val_accuracy: 0.7536 - val_loss: 0.7624
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - accuracy: 0.9891 - loss: 0.0417 - val_accuracy: 0.7665 - val_loss: 0.9085
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 22ms/step - accuracy: 0.7149 - loss: 0.5447 - val_accuracy: 0.7406 - val_loss: 0.5822
Epoch 2/5
[1m625/625[0m [32m━

  super().__init__(**kwargs)


[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 46ms/step - accuracy: 0.1819 - loss: 1.7994 - val_accuracy: 0.1781 - val_loss: 1.7720
Epoch 2/5
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.2235 - loss: 1.7538 - val_accuracy: 0.1805 - val_loss: 1.8548
Epoch 3/5
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.1918 - loss: 1.8008 - val_accuracy: 0.1849 - val_loss: 1.7778
Epoch 4/5
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.1970 - loss: 1.7741 - val_accuracy: 0.2073 - val_loss: 1.7756
Epoch 5/5
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.2132 - loss: 1.7676 - val_accuracy: 0.2674 - val_loss: 1.7685
Epoch 1/5
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 48ms/step - accuracy: 0.1822 - loss: 1.8118 - val_accuracy: 0.1822 - val_loss: 1.7968
Epoch 2/5
[1m115/115[0m [32m━━━━━━━