In [None]:
import pandas as pd
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv("data.csv")
scenes = data["SCENE"]
labels = data["Sarcasm"].values

# Step 1: Find the maximum duration across all audio files
def get_max_duration(scenes, sr=22050):
    max_duration = 0
    for file_path in scenes:
        n_f="audio_utterance/"+file_path+"_u.wav"
        y, _ = librosa.load(n_f, sr=sr)
        duration = librosa.get_duration(y=y, sr=sr)
        if duration > max_duration:
            max_duration = duration
        #print("done with",n_f)
    return max_duration

# Calculate the maximum duration (in seconds) across all files
max_duration = get_max_duration(scenes)
print(f"Maximum audio duration: {max_duration} seconds")

# Step 2: Convert audio to fixed-size mel spectrograms based on max duration
def audio_to_mel_spectrogram(file_path, n_mels=126, sr=22050, duration=max_duration):
    n_f="audio_utterance/"+file_path+"_u.wav"
    y, sr = librosa.load(n_f, sr=sr)
    target_length = int(duration * sr)

    # Pad or truncate the audio to matcjh the maximum duration
    if len(y) > target_length:
        y = y[:target_length]
    elif len(y) < target_length:
        y = np.pad(y, (0, target_length - len(y)), mode='constant')

    # Convert to mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    #print("done wh",n_f)
    return mel_spec_db

# Step 3: Prepare the dataset
mel_spectrograms = []
for scene in scenes:
    mel_spec = audio_to_mel_spectrogram(scene)
    mel_spectrograms.append(mel_spec)
print("here")
# Resize spectrograms to ensure consistent input shape (e.g., width of 216)
max_frames = max([mel.shape[1] for mel in mel_spectrograms])
mel_spectrograms = [librosa.util.fix_length(mel, size=max_frames, axis=1) for mel in mel_spectrograms]
X = np.array(mel_spectrograms)
X = X[..., np.newaxis]  # Add channel dimension for CNN
y = np.array(labels)
print("split")
# Split the dataset
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
np.save("x_train_sar.npy",X_train)
np.save("x_val_sar.npy",X_val)
np.save("y_train.npy",y_train)
np.save("y_val.npy",y_val)



In [None]:
import pandas as pd
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv("data.csv")
scenes = data["SCENE"]
labels = data["Sarcasm"].values

# Step 1: Find the maximum duration across all audio files
def get_max_duration(scenes, sr=22050):
    max_duration = 0
    for file_path in scenes:
        n_f="audio_context/"+file_path+"_c.wav"
        y, _ = librosa.load(n_f, sr=sr)
        duration = librosa.get_duration(y=y, sr=sr)
        if duration > max_duration:
            max_duration = duration
        #print("done with",n_f)
    return max_duration

# Calculate the maximum duration (in seconds) across all files
max_duration = get_max_duration(scenes)
print(f"Maximum audio duration: {max_duration} seconds")

# Step 2: Convert audio to fixed-size mel spectrograms based on max duration
def audio_to_mel_spectrogram(file_path, n_mels=126, sr=22050, duration=max_duration):
    n_f="audio_utterance/"+file_path+"_u.wav"
    y, sr = librosa.load(n_f, sr=sr)
    target_length = int(duration * sr)

    # Pad or truncate the audio to matcjh the maximum duration
    if len(y) > target_length:
        y = y[:target_length]
    elif len(y) < target_length:
        y = np.pad(y, (0, target_length - len(y)), mode='constant')

    # Convert to mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    #print("done wh",n_f)
    return mel_spec_db

# Step 3: Prepare the dataset
mel_spectrograms = []
for scene in scenes:
    mel_spec = audio_to_mel_spectrogram(scene)
    mel_spectrograms.append(mel_spec)
print("here")
# Resize spectrograms to ensure consistent input shape (e.g., width of 216)
max_frames_1 = max([mel.shape[1] for mel in mel_spectrograms])
mel_spectrograms = [librosa.util.fix_length(mel, size=max_frames_1, axis=1) for mel in mel_spectrograms]
X = np.array(mel_spectrograms)
X = X[..., np.newaxis]  # Add channel dimension for CNN
y = np.array(labels)
print("split")
# Split the dataset
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
np.save("x_train_con.npy",X_train)
np.save("x_val_con.npy",X_val)




In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch
context_branch = layers.Dense(128, activation="relu")(context_branch)
context_branch = layers.Dense(64, activation="relu")(context_branch)

# Fully connected layers for sarcasm branch
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
# Optional dropout to prevent overfitting
# x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch
context_branch = layers.Dense(128, activation="relu")(context_branch)
context_branch = layers.Dense(64, activation="relu")(context_branch)

# Fully connected layers for sarcasm branch
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
# Optional dropout to prevent overfitting
#x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch
context_branch = layers.Dense(128, activation="relu")(context_branch)
context_branch = layers.Dense(64, activation="relu")(context_branch)

# Fully connected layers for sarcasm branch
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
# Optional dropout to prevent overfitting
#x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch
context_branch = layers.Dense(128, activation="relu")(context_branch)
context_branch = layers.Dense(64, activation="relu")(context_branch)
context_branch = layers.Dense(32, activation="relu")(context_branch)
context_branch = layers.Dense(8, activation="relu")(context_branch)

# Fully connected layers for sarcasm branch
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(32, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(8, activation="relu")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
# Optional dropout to prevent overfitting
#x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch

context_branch = layers.Dense(64, activation="relu")(context_branch)
context_branch = layers.Dense(32, activation="relu")(context_branch)


# Fully connected layers for sarcasm branch

sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(32, activation="relu")(sarcasm_branch)


# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
x = layers.Dense(32, activation="relu")(combined)
x = layers.Dense(8, activation="relu")(combined)
# Optional dropout to prevent overfitting
#x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch

context_branch = layers.Dense(64, activation="relu")(context_branch)
context_branch = layers.Dense(32, activation="relu")(context_branch)


# Fully connected layers for sarcasm branch

sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(32, activation="relu")(sarcasm_branch)


# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
x = layers.Dense(32, activation="relu")(combined)
x = layers.Dense(8, activation="relu")(combined)
# Optional dropout to prevent overfitting
x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch
context_branch = layers.Dense(128, activation="relu")(context_branch)
context_branch = layers.Dense(64, activation="relu")(context_branch)

# Fully connected layers for sarcasm branch
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
# Optional dropout to prevent overfitting
x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=100,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch
context_branch = layers.Dense(128, activation="tanh")(context_branch)
context_branch = layers.Dense(64, activation="tanh")(context_branch)

# Fully connected layers for sarcasm branch
sarcasm_branch = layers.Dense(128, activation="tanh")(sarcasm_branch)
sarcasm_branch = layers.Dense(64, activation="tanh")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="tanh")(combined)
# Optional dropout to prevent overfitting
# x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=100,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

In [None]:
import numpy as np
from tensorflow.keras import layers, models
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Step 1: Load data
X_context_train =  np.load("x_train_con.npy")
X_context_val =  np.load("x_val_con.npy")
X_sarcasm_train = np.load("x_train_sar.npy")
X_sarcasm_val =  np.load("x_val_sar.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 2: Reshape data for ANN
# Flattening is handled in the model, but input must have the correct shape.
# Ensure input shape matches (128, 216, 1) for ANN branches.
X_context_train = X_context_train.reshape(-1, 126, max_frames_1, 1)
X_context_val = X_context_val.reshape(-1, 126, max_frames_1, 1)
X_sarcasm_train = X_sarcasm_train.reshape(-1, 126, max_frames, 1)
X_sarcasm_val = X_sarcasm_val.reshape(-1, 126, max_frames, 1)

# Step 3: Build the multimodal ANN model
context_input = layers.Input(shape=(126, max_frames_1, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, max_frames, 1), name="sarcasm_input")

# Flatten the inputs for ANN
context_branch = layers.Flatten()(context_input)
sarcasm_branch = layers.Flatten()(sarcasm_input)

# Fully connected layers for context branch
context_branch = layers.Dense(128, activation="relu")(context_branch)
context_branch = layers.Dense(64, activation="relu")(context_branch)
context_branch = layers.Dense(32, activation="relu")(context_branch)
context_branch = layers.Dense(8, activation="relu")(context_branch)

# Fully connected layers for sarcasm branch
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(32, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(8, activation="relu")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
# Optional dropout to prevent overfitting
#x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              loss="binary_crossentropy", 
              metrics=["accuracy"])

# Model summary
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=100,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)


# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Generate predictions and calculate metrics
y_pred = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels (0 or 1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_pred_classes)
recall = recall_score(y_val, y_pred_classes)
f1 = f1_score(y_val, y_pred_classes)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Optional: Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_val, y_pred_classes))

### CNN

In [None]:
import pandas as pd
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split


X_context_train=np.load("x_train_con.npy")
X_context_val=np.load("x_val_con.npy")
X_sarcasm_train=np.load("x_train_sar.npy")
X_sarcasm_val=np.load("x_val_sar.npy")
y_train=np.load("y_train.npy")
y_val=np.load("y_val.npy")


# Step 4: Build the multimodal CNN model
context_input = layers.Input(shape=(126, 512, 1), name="context_input")
sarcasm_input = layers.Input(shape=(126, 512, 1), name="sarcasm_input")

# Context branch
context_branch = layers.Conv2D(64, (3, 3), activation="relu")(context_input)
context_branch = layers.MaxPooling2D((2, 2))(context_branch)
#context_branch = layers.Conv2D(64, (3, 3), activation="relu")(context_branch)
#context_branch = layers.MaxPooling2D((2, 2))(context_branch)
context_branch = layers.Flatten()(context_branch)

# Sarcasm branch
sarcasm_branch = layers.Conv2D(64, (3, 3), activation="relu")(sarcasm_input)
sarcasm_branch = layers.MaxPooling2D((2, 2))(sarcasm_branch)
#sarcasm_branch = layers.Conv2D(64, (3, 3), activation="relu")(sarcasm_branch)
#sarcasm_branch = layers.MaxPooling2D((2, 2))(sarcasm_branch)
sarcasm_branch = layers.Flatten()(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
#x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

# Step 5: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)

# Step 6: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Save the model
#model.save("context_sarcasm_model.h5")
print("Model saved as 'context_sarcasm_model.h5'")
