## Step 1: Setup Packages

In [11]:
!pip install langdetect



In [2]:
import pandas as pd
import numpy as np

from langdetect import detect

from sklearn.model_selection import train_test_split

from keras.utils import to_categorical
from keras.layers import Dense, TimeDistributed
from keras.models import load_model, Model
from keras.optimizers import Adam

## Step 2: Load Title Column

In [3]:
df = pd.read_csv('Online Content.csv', usecols=['title'])

## Step 3: Map Detected Language

In [4]:
language_map = {'en': 0, 'fr': 1, 'es': 2, 'de': 3, 'it': 4}
languages = []

In [5]:
for title in df['title']:
    try:
        lang = detect(title)
        lang_label = language_map.get(lang, -1)
    except Exception as e:
        lang_label = -1
    languages.append(lang_label)

language_labels = np.array(languages)

np.save('language_labels.npy', language_labels)

## Step 4: Load Data

In [6]:
X = np.load('audio.npy')
y_speech = np.load('labels.npy')
y_language = np.load('language_labels.npy')

y_language_onehot = to_categorical(y_language, num_classes=5)

X_train, X_val, y_speech_train, y_speech_val, y_lang_train, y_lang_val = train_test_split(
    X, y_speech, y_language_onehot, test_size=0.2, random_state=44)

## Step 5: Load Pre-trained Model

In [7]:
pretrained_model = load_model('New_ASR_model.keras')

 ## Step 6: Updating Model Architecture

In [8]:
# Define input layer from the existing pretrained model
input_layer = pretrained_model.input

# Use output of the last LSTM layer from the pretrained model
lstm_layer = pretrained_model.layers[-2].output

# Task 1: Speech Recognition Output
speech_output = TimeDistributed(Dense(y_speech.shape[2], activation='softmax'), name='speech_output')(lstm_layer)

# Task 2: Language Detection Output
language_output = Dense(y_language_onehot.shape[1], activation='softmax', name='language_output')(lstm_layer[:, -1])

# Create the new Model instance
new_model = Model(inputs=input_layer, outputs=[speech_output, language_output])

# Compile the new model
new_model.compile(optimizer=Adam(learning_rate=0.001),
                  loss={'speech_output': 'categorical_crossentropy', 'language_output': 'categorical_crossentropy'},
                  metrics=['accuracy'],
                  loss_weights={'speech_output': 0.7, 'language_output': 0.3})

## Step 7: Train Model

In [9]:
history = new_model.fit(
    X_train,
    {'speech_output': y_speech_train, 'language_output': y_lang_train},
    validation_data=(X_val, {'speech_output': y_speech_val, 'language_output': y_lang_val}),
    epochs=10,
    batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Step 8: Save Trained Model

In [10]:
new_model.save('multitask_ASR_model.keras')