In [None]:
#1- Task: Given a sequence of alphabets (with some missing values), use an RNN and a
#Bidirectional RNN model to predict the missing values in the sequence.

import string
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Bidirectional
from tensorflow.keras.utils import to_categorical

# STEP 1: Dataset with missing characters and their targets
sequences_with_missing = [
    ("MACHINE_", "E"),
    ("A_CHINE", "M"),
    ("_ACHINE", "M"),
    ("MA_HINE", "C"),
    ("MAC_INE", "H"),
]

# STEP 2: Encoding and padding
alphabets = list(string.ascii_uppercase)
char2idx = {char: idx + 1 for idx, char in enumerate(alphabets)}
char2idx['_'] = 0  # placeholder for missing
idx2char = {idx: char for char, idx in char2idx.items()}

X_encoded = []
y_encoded = []

for seq, target in sequences_with_missing:
    encoded_seq = [char2idx[char] for char in seq]
    X_encoded.append(encoded_seq)
    y_encoded.append(char2idx[target])

# Pad sequences
X_padded = pad_sequences(X_encoded, padding='post')
y_encoded = np.array(y_encoded)

# One-hot encode inputs and outputs
num_classes = len(char2idx)
seq_length = X_padded.shape[1]
X_onehot = to_categorical(X_padded, num_classes=num_classes)
y_onehot = to_categorical(y_encoded, num_classes=num_classes)

# STEP 3: Build and train RNN
model_rnn = Sequential([
    SimpleRNN(64, input_shape=(seq_length, num_classes)),
    Dense(num_classes, activation='softmax')
])
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_rnn.fit(X_onehot, y_onehot, epochs=100, verbose=0)
loss_rnn, acc_rnn = model_rnn.evaluate(X_onehot, y_onehot, verbose=0)
print(f"✅ RNN Training Complete - Accuracy: {acc_rnn:.2f}")

# STEP 4: Build and train Bidirectional RNN
model_birnn = Sequential([
    Bidirectional(SimpleRNN(64), input_shape=(seq_length, num_classes)),
    Dense(num_classes, activation='softmax')
])
model_birnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_birnn.fit(X_onehot, y_onehot, epochs=100, verbose=0)
loss_bi, acc_bi = model_birnn.evaluate(X_onehot, y_onehot, verbose=0)
print(f"✅ Bidirectional RNN Training Complete - Accuracy: {acc_bi:.2f}")

# STEP 5: Predict and compare
def predict_and_decode(model, X_input, idx2char):
    predictions = model.predict(X_input)
    predicted_indices = predictions.argmax(axis=1)
    predicted_chars = [idx2char[idx] for idx in predicted_indices]
    return predicted_chars

pred_rnn = predict_and_decode(model_rnn, X_onehot, idx2char)
pred_birnn = predict_and_decode(model_birnn, X_onehot, idx2char)
actual_targets = [idx2char[idx] for idx in y_encoded]
input_sequences = [[idx2char.get(i, '_') for i in seq] for seq in X_padded]

print("\n📊 Prediction Comparison:\n")
for i, (inp, actual, rnn_pred, birnn_pred) in enumerate(zip(input_sequences, actual_targets, pred_rnn, pred_birnn)):
    print(f"Seq {i+1}: {' '.join(inp)}")
    print(f"  Actual Target     : {actual}")
    print(f"  RNN Prediction    : {rnn_pred}")
    print(f"  Bi-RNN Prediction : {birnn_pred}\n")

# STEP 6: Save models
model_rnn.save("alphabet_rnn_model.h5")
model_birnn.save("alphabet_birnn_model.h5")
print("✅ Models saved as 'alphabet_rnn_model.h5' and 'alphabet_birnn_model.h5'")


  super().__init__(**kwargs)


✅ RNN Training Complete - Accuracy: 1.00


  super().__init__(**kwargs)


✅ Bidirectional RNN Training Complete - Accuracy: 1.00




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 268ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289ms/step





📊 Prediction Comparison:

Seq 1: M A C H I N E _
  Actual Target     : E
  RNN Prediction    : E
  Bi-RNN Prediction : E

Seq 2: A _ C H I N E _
  Actual Target     : M
  RNN Prediction    : M
  Bi-RNN Prediction : M

Seq 3: _ A C H I N E _
  Actual Target     : M
  RNN Prediction    : M
  Bi-RNN Prediction : M

Seq 4: M A _ H I N E _
  Actual Target     : C
  RNN Prediction    : C
  Bi-RNN Prediction : C

Seq 5: M A C _ I N E _
  Actual Target     : H
  RNN Prediction    : H
  Bi-RNN Prediction : H

✅ Models saved as 'alphabet_rnn_model.h5' and 'alphabet_birnn_model.h5'


In [None]:
import random
import string
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Bidirectional
from tensorflow.keras.utils import to_categorical

# Step 1: Create large dataset of synthetic alphabet sequences
def generate_sequences(num_samples=500):
    data = []
    for _ in range(num_samples):
        # Random sequence of length 7-8 with no duplicates
        seq_length = random.choice([7, 8])
        full_seq = random.sample(string.ascii_uppercase, seq_length)

        # Randomly remove 1 char
        missing_index = random.randint(0, len(full_seq) - 1)
        missing_char = full_seq[missing_index]
        full_seq[missing_index] = '_'  # Replace with placeholder

        sequence_str = ''.join(full_seq)
        data.append((sequence_str, missing_char))
    return data

# Generate big dataset
sequences_with_missing = generate_sequences(500)

# Step 2: Preprocessing
char2idx = {char: idx + 1 for idx, char in enumerate(string.ascii_uppercase)}
char2idx['_'] = 0
idx2char = {idx: char for char, idx in char2idx.items()}

X_encoded = []
y_encoded = []

for seq, target in sequences_with_missing:
    encoded_seq = [char2idx[char] for char in seq]
    X_encoded.append(encoded_seq)
    y_encoded.append(char2idx[target])

# Pad sequences to max length
X_padded = pad_sequences(X_encoded, padding='post')
y_encoded = np.array(y_encoded)

# One-hot encode inputs and targets
num_classes = len(char2idx)
seq_length = X_padded.shape[1]
X_onehot = to_categorical(X_padded, num_classes=num_classes)
y_onehot = to_categorical(y_encoded, num_classes=num_classes)

# Step 3: Build & train RNN model
model_rnn = Sequential([
    SimpleRNN(64, input_shape=(seq_length, num_classes)),
    Dense(num_classes, activation='softmax')
])
model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_rnn.fit(X_onehot, y_onehot, epochs=10, batch_size=32, verbose=1)

# Step 4: Build & train Bidirectional RNN model
model_birnn = Sequential([
    Bidirectional(SimpleRNN(64), input_shape=(seq_length, num_classes)),
    Dense(num_classes, activation='softmax')
])
model_birnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_birnn.fit(X_onehot, y_onehot, epochs=10, batch_size=32, verbose=1)

# Step 5: Evaluate both models
loss_rnn, acc_rnn = model_rnn.evaluate(X_onehot, y_onehot, verbose=0)
loss_birnn, acc_birnn = model_birnn.evaluate(X_onehot, y_onehot, verbose=0)
print(f"✅ RNN Accuracy: {acc_rnn:.2f}")
print(f"✅ Bi-RNN Accuracy: {acc_birnn:.2f}")

# Step 6: Save models
model_rnn.save("alphabet_rnn_model.h5")
model_birnn.save("alphabet_birnn_model.h5")
print("✅ Models saved.")


Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.0385 - loss: 3.3554
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0694 - loss: 3.2401
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0812 - loss: 3.1854
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1008 - loss: 3.1097
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1458 - loss: 3.0320
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1732 - loss: 2.9465
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2407 - loss: 2.8688
Epoch 8/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2132 - loss: 2.8418
Epoch 9/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m



✅ RNN Accuracy: 0.29
✅ Bi-RNN Accuracy: 0.51
✅ Models saved.


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Embedding, Input
from sklearn.preprocessing import LabelEncoder

# Step 1: Create dataset
def create_dataset():
    alphabet = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    sequence = (['A', 'B', 'C', '?', 'E', 'F', 'G', '?', 'I', 'J', 'K', 'L',
                '?', 'N', 'O', 'P', 'Q', 'R', '?', 'T', 'U', 'V', 'W', 'X',
                'Y', '?'] * 2 +
                ['A', '?', 'C', 'D', 'E', '?', 'G', 'H', 'I', '?', 'K', 'L',
                'M', 'N', '?', 'P', 'Q', 'R', 'S', 'T', '?', 'V', 'W', 'X',
                'Y', 'Z'])
    return sequence, alphabet

# Step 2: Preprocess data (Fixed)
def preprocess_data(sequence, alphabet, seq_length=6):
    encoder = LabelEncoder()
    encoder.fit(alphabet)

    # Replace '?' with a special token (0) and encode valid characters
    encoded_seq = [encoder.transform([c])[0] if c != '?' else 0 for c in sequence]

    X, y = [], []
    for i in range(len(encoded_seq) - seq_length):
        window = encoded_seq[i:i + seq_length]
        target = encoded_seq[i + seq_length]

        if 0 not in window:  # Only train on fully known sequences
            X.append(window)
            y.append(target)

    X = np.array(X)
    y = np.array(y).reshape(-1, 1)  # Ensure `y` has the correct shape

    return X, y, encoder, encoded_seq

# Step 3: Build LSTM Model
def build_lstm_model(input_dim, embedding_dim=16, lstm_units=128):
    inputs = Input(shape=(None,))
    embedding = Embedding(input_dim=input_dim, output_dim=embedding_dim, mask_zero=True)(inputs)
    lstm = LSTM(lstm_units, return_sequences=False)(embedding)
    outputs = Dense(input_dim, activation='softmax')(lstm)

    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

# Step 4: Predict missing values
def predict_sequence(model, sequence, encoder, encoded_seq, seq_length=6):
    predictions = sequence.copy()

    for i in range(len(predictions)):
        if predictions[i] == '?':
            context_window = []
            for j in range(i - seq_length, i):
                if j < 0 or predictions[j] == '?':
                    context_window.append(0)  # Padding for unknown values
                else:
                    context_window.append(encoder.transform([predictions[j]])[0])

            context_window = np.array(context_window).reshape(1, seq_length)

            pred = model.predict(context_window, verbose=0)
            predicted_idx = np.argmax(pred)
            predicted_char = encoder.inverse_transform([predicted_idx])[0]

            predictions[i] = predicted_char
            encoded_seq[i] = predicted_idx

    return predictions

# Main Execution
def main():
    sequence, alphabet = create_dataset()
    X, y, encoder, encoded_seq = preprocess_data(sequence, alphabet)

    if len(X) == 0:
        print("Error: No valid training sequences found")
        return

    # Convert labels to one-hot encoding
    y = tf.keras.utils.to_categorical(y, num_classes=len(alphabet))

    # Split data
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    # Build and train LSTM model
    model = build_lstm_model(input_dim=len(alphabet))
    print("Training LSTM model...")
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

    # Predict missing values
    print("\nOriginal sequence:", ' '.join(sequence))
    lstm_predictions = predict_sequence(model, sequence, encoder, encoded_seq)

    print("LSTM predictions:", ' '.join(lstm_predictions))

if __name__ == "__main__":
    main()


Training LSTM model...
Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.0000e+00 - loss: 3.2617 - val_accuracy: 1.0000 - val_loss: 3.2451
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 486ms/step - accuracy: 1.0000 - loss: 3.2451 - val_accuracy: 1.0000 - val_loss: 3.2281
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step - accuracy: 1.0000 - loss: 3.2281 - val_accuracy: 1.0000 - val_loss: 3.2099
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step - accuracy: 1.0000 - loss: 3.2099 - val_accuracy: 1.0000 - val_loss: 3.1900
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step - accuracy: 1.0000 - loss: 3.1900 - val_accuracy: 1.0000 - val_loss: 3.1677
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 1.0000 - loss: 3.1677 - val_accuracy: 1.0000 - val_loss: 3.1422
Epoch 7/50
[1m1

In [None]:
#Predict the next word in a sentence using an RNN. Consider the following sentence
dataset:
The cat sat on the mat.
The dog sat on the rug.
The bird flew in the sky.
The cat jumped over the fence.
And predict “The cat sat on __-“


import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 1. Dataset
sentences = [
    "The cat sat on the mat",
    "The dog sat on the rug",
    "The bird flew in the sky",
    "The cat jumped over the fence"
]

# 1. Text Preprocessing
def preprocess_data(sentences):
    # Tokenize the text
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences)

    # Convert sentences to sequences
    sequences = tokenizer.texts_to_sequences(sentences)

    # Create input-output pairs
    X, y = [], []
    for seq in sequences:
        for i in range(1, len(seq)):
            X.append(seq[:i])
            y.append(seq[i])

    # Pad sequences to ensure uniform length
    max_length = max(len(seq) for seq in X)
    X = pad_sequences(X, maxlen=max_length, padding='pre')

    # Convert y to one-hot encoding
    vocab_size = len(tokenizer.word_index) + 1
    y = np.array(y)

    return X, y, tokenizer, max_length, vocab_size

# 2. Model Building
def build_model(vocab_size, max_length):
    model = Sequential([
        Embedding(vocab_size, 10, input_length=max_length),
        SimpleRNN(50, return_sequences=False),
        Dense(vocab_size, activation='softmax')
    ])

    model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    return model

# 3. Training and Prediction
def train_and_predict(sentences, predict_sentence="The cat sat on"):
    # Preprocess data
    X, y, tokenizer, max_length, vocab_size = preprocess_data(sentences)

    # Build and train model
    model = build_model(vocab_size, max_length)
    model.fit(X, y, epochs=100, verbose=0)

    # Prepare prediction input
    predict_seq = tokenizer.texts_to_sequences([predict_sentence])[0]
    predict_seq = pad_sequences([predict_seq], maxlen=max_length, padding='pre')

    # Make prediction
    prediction = model.predict(predict_seq, verbose=0)
    predicted_word_idx = np.argmax(prediction[0])
    predicted_word = tokenizer.index_word[predicted_word_idx]

    return predicted_word

# Main execution
def main():
    # Train and predict
    predicted_word = train_and_predict(sentences)

    print(f"Input sentence: 'The cat sat on'")
    print(f"Predicted next word: {predicted_word}")
    print(f"Complete prediction: 'The cat sat on {predicted_word}'")

    # Show the vocabulary
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences)
    print("\nVocabulary:", tokenizer.word_index)

if __name__ == "__main__":
    np.random.seed(42)
    main()



Input sentence: 'The cat sat on'
Predicted next word: the
Complete prediction: 'The cat sat on the'

Vocabulary: {'the': 1, 'cat': 2, 'sat': 3, 'on': 4, 'mat': 5, 'dog': 6, 'rug': 7, 'bird': 8, 'flew': 9, 'in': 10, 'sky': 11, 'jumped': 12, 'over': 13, 'fence': 14}


In [None]:
##3- Develop a sequence generator for Indian Classical Music Raga using an RNN to predict the
#next note in a series. The notes involved are Sa, Re, Ga, Ma, Pa, Dha, Ni, and Sha.

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import random

# 1. Dataset Preparation
# Define basic notes
notes = ['Sa', 'Re', 'Ga', 'Ma', 'Pa', 'Dha', 'Ni', 'Sha']

# Define specific raga scales (simplified versions)
raga_scales = {
    'Bhairav': ['Sa', 'Re', 'Ga', 'Ma', 'Pa', 'Dha', 'Ni'],
    'Bhopali': ['Sa', 'Re', 'Ga', 'Pa', 'Dha'],
    'Bageshree': ['Sa', 'Ga', 'Ma', 'Dha', 'Ni']
}

# 2. Preprocess Data
def create_sequences(scale, sequence_length=5, num_sequences=100):
    # Convert notes to numerical values
    note_to_int = {note: i for i, note in enumerate(scale)}
    int_to_note = {i: note for i, note in enumerate(scale)}

    # Generate random sequences
    sequences = []
    next_notes = []

    for _ in range(num_sequences):
        start_idx = random.randint(0, len(scale) - sequence_length)
        seq = scale[start_idx:start_idx + sequence_length]
        next_note = scale[(start_idx + sequence_length) % len(scale)]

        sequences.append([note_to_int[note] for note in seq])
        next_notes.append(note_to_int[next_note])

    return np.array(sequences), np.array(next_notes), note_to_int, int_to_note

# 3. Model Building
def build_model(input_shape, num_notes):
    model = Sequential([
        LSTM(128, input_shape=input_shape, return_sequences=True),
        Dropout(0.2),
        LSTM(64),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(num_notes, activation='softmax')
    ])

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
    return model

# 4. Training and Generation
def train_and_generate(raga_name, epochs=20, sequence_length=5, generate_length=20):
    # Prepare data
    scale = raga_scales.get(raga_name, notes)  # Default to full scale if raga not found
    X, y, note_to_int, int_to_note = create_sequences(scale, sequence_length)

    # Reshape input for LSTM [samples, timesteps, features]
    X = X.reshape((X.shape[0], X.shape[1], 1))

    # Build and train model
    model = build_model((sequence_length, 1), len(scale))
    model.fit(X, y, epochs=epochs, batch_size=32, verbose=1)

    # Generate sequence
    def generate_sequence(model, seed_sequence, length):
        generated = seed_sequence.copy()

        for _ in range(length):
            x_pred = np.array(generated[-sequence_length:]).reshape(1, sequence_length, 1)
            prediction = model.predict(x_pred, verbose=0)
            next_note = np.argmax(prediction)
            generated.append(next_note)

        return [int_to_note[i] for i in generated]

    # Generate a sequence
    seed_idx = random.randint(0, len(X) - 1)
    seed_sequence = X[seed_idx, :, 0].tolist()
    generated_sequence = generate_sequence(model, seed_sequence, generate_length)

    return generated_sequence

# Main execution
def main():
    ragas_to_generate = ['Bhairav', 'Bhopali', 'Bageshree']

    print("Generating Raga Sequences:")
    print("-" * 50)

    for raga in ragas_to_generate:
        print(f"\nRaga {raga}:")
        sequence = train_and_generate(raga, epochs=10)
        print("Generated sequence:", " -> ".join(sequence))

        # Print the scale used
        print(f"Scale used: {', '.join(raga_scales[raga])}")

if __name__ == "__main__":
    # Set random seed for reproducibility
    np.random.seed(42)
    random.seed(42)

    main()

Generating Raga Sequences:
--------------------------------------------------

Raga Bhairav:
Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - loss: 1.9417
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.8027
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.7048
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.6088
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 1.4700
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.4568
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 1.3678
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.2316
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 1.1901
Epoch 10/10
[1m4/4[0m 