In [5]:
import tensorflow as tf

# Check if TensorFlow is using the GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# This should show the available GPU(s)
if tf.config.list_physical_devices('GPU'):
    print("TensorFlow is using the GPU!")
else:
    print("TensorFlow is using the CPU.")


Num GPUs Available:  2
TensorFlow is using the GPU!


In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

# Load IMDb dataset function
def load_imdb_dataset():
    # IMDb dataset loading (from imdb_master.csv)
    imdb_data = pd.read_csv('/kaggle/input/imdb-review-dataset/imdb_master.csv', encoding='latin-1')
    
    # Assuming 'review' column has the text and 'label' is the sentiment
    imdb_data = imdb_data[imdb_data['label'] != 'unsup']  # Exclude unsupervised data
    
    imdb_train_data = imdb_data['review'].values
    imdb_train_labels = imdb_data['label'].map({'pos': 1, 'neg': 0}).values  # Mapping labels to 0 (neg) and 1 (pos)

    # Split data into training and testing (using an 80/20 split)
    split_index = int(0.8 * len(imdb_train_data))
    imdb_test_data = imdb_train_data[split_index:]
    imdb_test_labels = imdb_train_labels[split_index:]
    imdb_train_data = imdb_train_data[:split_index]
    imdb_train_labels = imdb_train_labels[:split_index]

    # Preprocess text data (e.g., tokenization and padding)
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
    tokenizer.fit_on_texts(imdb_train_data)
    
    imdb_train_data = tokenizer.texts_to_sequences(imdb_train_data)
    imdb_test_data = tokenizer.texts_to_sequences(imdb_test_data)
    
    imdb_train_data = tf.keras.preprocessing.sequence.pad_sequences(imdb_train_data, maxlen=250)
    imdb_test_data = tf.keras.preprocessing.sequence.pad_sequences(imdb_test_data, maxlen=250)
    
    return imdb_train_data, imdb_train_labels, imdb_test_data, imdb_test_labels

# Load HAR dataset function
def load_har_dataset():
    # Load HAR dataset
    har_train_data = pd.read_csv('/kaggle/input/human-activity-recognition-with-smartphones/train.csv')
    har_test_data = pd.read_csv('/kaggle/input/human-activity-recognition-with-smartphones/test.csv')

    # Extract features and labels
    har_train_labels = har_train_data.iloc[:, -1].values  # Extract labels as-is
    har_train_data = har_train_data.iloc[:, :-1].values  # Features
    har_test_labels = har_test_data.iloc[:, -1].values  # Extract labels as-is
    har_test_data = har_test_data.iloc[:, :-1].values  # Features

    # Check unique labels to convert them to numeric
    unique_labels = np.unique(np.concatenate((har_train_labels, har_test_labels)))
    label_to_index = {label: index for index, label in enumerate(unique_labels)}

    # Convert labels to numeric format
    har_train_labels = np.array([label_to_index[label] for label in har_train_labels])
    har_test_labels = np.array([label_to_index[label] for label in har_test_labels])

    return har_train_data, har_train_labels, har_test_data, har_test_labels

# Normalize HAR data
def normalize_data(data):
    return data / np.linalg.norm(data, axis=1, keepdims=True)

# Function to train and evaluate an RNN on both IMDb and HAR datasets
def train_rnn_model(train_data, train_labels, test_data, test_labels, input_shape, n_classes, units, model_type="many_to_one"):
    model = Sequential()
    
    if model_type == "many_to_one":
        # For IMDb, using Embedding layer (which outputs 3D tensors)
        if len(input_shape) == 1:  # IMDb dataset (with embedding layer)
            model.add(Embedding(10000, 128))  # No need to specify input_length explicitly
            model.add(SimpleRNN(units, activation='relu', return_sequences=False))  # Many-to-One model

        # For HAR dataset, reshape input as (batch_size, timesteps, features)
        else:  
            model.add(SimpleRNN(units, activation='relu', input_shape=input_shape, return_sequences=False))
    
    model.add(Dense(n_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_data, train_labels, epochs=5, batch_size=64, verbose=1)

    # Evaluate model
    loss, accuracy = model.evaluate(test_data, test_labels, verbose=0)
    return accuracy



# Varying configurations for IMDb (short sequences) and HAR (long sequences)
imdb_configs = [32, 64]
har_configs = [32, 64]
models = ["many_to_one", "one_to_one", "one_to_many"]

# Load datasets
imdb_train_data, imdb_train_labels, imdb_test_data, imdb_test_labels = load_imdb_dataset()
har_train_data, har_train_labels, har_test_data, har_test_labels = load_har_dataset()
har_train_data = normalize_data(har_train_data)
har_test_data = normalize_data(har_test_data)

# Storing the results for different configurations
imdb_results = []
har_results = []

# Train and evaluate models on IMDb dataset
for units in imdb_configs:
    for model_type in models:
        accuracy = train_rnn_model(imdb_train_data, imdb_train_labels, imdb_test_data, imdb_test_labels, (250,), 2, units, model_type=model_type)
        imdb_results.append([f"Units: {units}, Model: {model_type}", accuracy])

# Train and evaluate models on HAR dataset
for units in har_configs:
    for model_type in models:
        accuracy = train_rnn_model(har_train_data, har_train_labels, har_test_data, har_test_labels, (561,), 6, units, model_type=model_type)
        har_results.append([f"Units: {units}, Model: {model_type}", accuracy])

# Tabulate results
print("IMDb Dataset (Short Sequences) Results:")
print("+---------------------------+-----------------+")
print("| Configuration              |   Test Accuracy |")
print("+===========================+=================+")
for result in imdb_results:
    print(f"| {result[0]:<27} | {result[1]:>15.4f} |")
print("+---------------------------+-----------------+")

print("\nHAR Dataset (Long Sequences) Results:")
print("+---------------------------+-----------------+")
print("| Configuration              |   Test Accuracy |")
print("+===========================+=================+")
for result in har_results:
    print(f"| {result[0]:<27} | {result[1]:>15.4f} |")
print("+---------------------------+-----------------+")


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 20ms/step - accuracy: 0.6932 - loss: 0.5593
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 20ms/step - accuracy: 0.8876 - loss: 0.2712
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.8967 - loss: 0.2524
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.7934 - loss: 0.4283
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.9201 - loss: 0.2151
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.5239 - loss: 509.3690
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5269 - loss: 27.4890
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5285 - loss: 27.4408
Epoch 4/5
[1m625/625[0m [32m━━━