In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score

# Load preprocessed data
X_context_train = np.load("x_context_train.npy")
X_context_val = np.load("x_context_val.npy")
X_sarcasm_train = np.load("x_sarcasm_train.npy")
X_sarcasm_val = np.load("x_sarcasm_val.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Load the original data to get speaker information
data = pd.read_csv("C:/Users/lenovo/Downloads/data.csv")

# Ensure the speaker data matches the training samples
speakers = data["SPEAKER"][:len(y_train) + len(y_val)]  # Limit to relevant rows

# Step 1: One-hot encode the speaker column
encoder = OneHotEncoder(sparse_output=False)
speaker_one_hot = encoder.fit_transform(speakers.values.reshape(-1, 1))

# Step 2: Match the speaker one-hot encodings to training and validation splits
X_speaker_train = speaker_one_hot[:len(y_train)]
X_speaker_val = speaker_one_hot[len(y_train):]

# Step 3: Update the model to include speaker information as a separate branch
context_input = layers.Input(shape=(128, 216, 1), name="context_input")
sarcasm_input = layers.Input(shape=(128, 216, 1), name="sarcasm_input")
speaker_input = layers.Input(shape=(speaker_one_hot.shape[1],), name="speaker_input")

# Context branch
context_branch = layers.Conv2D(128, (3, 3), activation="relu")(context_input)
context_branch = layers.MaxPooling2D((2, 2))(context_branch)
context_branch = layers.Conv2D(64, (3, 3), activation="relu")(context_branch)
context_branch = layers.MaxPooling2D((2, 2))(context_branch)
context_branch = layers.Flatten()(context_branch)

# Sarcasm branch
sarcasm_branch = layers.Conv2D(128, (3, 3), activation="relu")(sarcasm_input)
sarcasm_branch = layers.MaxPooling2D((2, 2))(sarcasm_branch)
sarcasm_branch = layers.Conv2D(64, (3, 3), activation="relu")(sarcasm_branch)
sarcasm_branch = layers.MaxPooling2D((2, 2))(sarcasm_branch)
sarcasm_branch = layers.Flatten()(sarcasm_branch)

# Speaker branch
speaker_branch = layers.Dense(32, activation="relu")(speaker_input)

# Combine all branches
combined = layers.Concatenate()([context_branch, sarcasm_branch, speaker_branch])
x = layers.Dense(64, activation="relu")(combined)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(
    inputs=[context_input, sarcasm_input, speaker_input],  # Include speaker_input
    outputs=output,
)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)
model.summary()

# Step 4: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train, X_speaker_train],
    y_train,
    epochs=10,
    batch_size=64,
    validation_data=([X_context_val, X_sarcasm_val, X_speaker_val], y_val),
)

# Step 5: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val, X_speaker_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 6: Make predictions and calculate additional metrics
y_pred_probs = model.predict([X_context_val, X_sarcasm_val, X_speaker_val])
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate metrics
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
accuracy = accuracy_score(y_val, y_pred)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=["Not Sarcastic", "Sarcastic"]))

# Save the model
model.save("context_sarcasm_model_with_speaker.h5")
print("Model saved as 'context_sarcasm_model_with_speaker.h5'")


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

# Load preprocessed data
X_context_train = np.load("x_context_train.npy")
X_context_val = np.load("x_context_val.npy")
X_sarcasm_train = np.load("x_sarcasm_train.npy")
X_sarcasm_val = np.load("x_sarcasm_val.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 4: Build the multimodal CNN model
context_input = layers.Input(shape=(128, 216, 1), name="context_input")
sarcasm_input = layers.Input(shape=(128, 216, 1), name="sarcasm_input")

# Context branch
context_branch = layers.Conv2D(128, (3, 3), activation="relu")(context_input)
context_branch = layers.MaxPooling2D((2, 2))(context_branch)
context_branch = layers.Conv2D(64, (3, 3), activation="relu")(context_branch)
context_branch = layers.MaxPooling2D((2, 2))(context_branch)
context_branch = layers.Flatten()(context_branch)

# Sarcasm branch
sarcasm_branch = layers.Conv2D(128, (3, 3), activation="relu")(sarcasm_input)
sarcasm_branch = layers.MaxPooling2D((2, 2))(sarcasm_branch)
sarcasm_branch = layers.Conv2D(64, (3, 3), activation="relu")(sarcasm_branch)
sarcasm_branch = layers.MaxPooling2D((2, 2))(sarcasm_branch)
sarcasm_branch = layers.Flatten()(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
# x = layers.Dropout(0.2)(x)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

# Step 5: Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train], y_train,
    epochs=10,
    batch_size=32,
    validation_data=([X_context_val, X_sarcasm_val], y_val),
)

# Step 6: Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Step 7: Calculate F1 Score, Precision, Recall, and Accuracy
y_pred_probs = model.predict([X_context_val, X_sarcasm_val])  # Predict probabilities
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate metrics
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
accuracy = accuracy_score(y_val, y_pred)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=["Not Sarcastic", "Sarcastic"]))

# Save the model
# model.save("context_sarcasm_model.h5")
print("Model saved as 'context_sarcasm_model.h5'")


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

# Load audio data
X_audio = np.load('C:/Users/lenovo/Downloads/ipwork/spectrogram_data/X.npy')  
y_audio = np.load('C:/Users/lenovo/Downloads/ipwork/Y.npy') 

# Split the data into training and testing sets
X_audio_train, X_audio_test, y_train, y_test = train_test_split(
    X_audio, y_audio, test_size=0.2, random_state=42)

# Input layer for audio
audio_input = layers.Input(shape=(128, 216, 1)) 

audio_model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 216, 1)),  # First Conv Layer
    layers.MaxPooling2D((2, 2)),  # Max Pooling
    layers.Conv2D(64, (3, 3), activation='relu'),  # Second Conv Layer
    layers.MaxPooling2D((2, 2)),  # Max Pooling
    layers.Conv2D(64, (3, 3), activation='relu'),  # Third Conv Layer
    layers.Flatten(),  # Flatten the output
    layers.Dense(64, activation='relu'),  # Fully connected layer
])

audio_output = audio_model(audio_input)  # Pass the input through the audio model

# Add additional dense layers for final classification
x = layers.Dense(128, activation='relu')(audio_output)  # Increased to 128 neurons
x = layers.Dense(64, activation='relu')(x)  # Next layer with 64 neurons
x = layers.Dense(32, activation='relu')(x)  # Next layer with 32 neurons
x = layers.Dense(16, activation='relu')(x)  # Next layer with 16 neurons
output = layers.Dense(1, activation='sigmoid')(x)  # Final layer with 1 neuron for binary classification

#Create the final model
model = models.Model(inputs=audio_input, outputs=output)

#Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Use binary crossentropy for binary classification
              metrics=['accuracy'])

# Print the model summary
model.summary()

#Train the model
history = model.fit(
    X_audio_train, y_train, 
    validation_data=(X_audio_test, y_test),  # Validation split using test data
    epochs=20,  # Train for 20 epochs
    batch_size=32  # Set batch size
)

#Evaluate the model on test data
y_pred_probs = model.predict(X_audio_test)  # Predict probabilities
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate metrics
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

#Print metrics
print("\nEvaluation Metrics:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")

# Generate and print detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Class 0", "Class 1"]))


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load audio data
X_audio = np.load('C:/Users/lenovo/Downloads/ipwork/spectrogram_data/X.npy')  
y_audio = np.load('C:/Users/lenovo/Downloads/ipwork/Y.npy') 

# Load text data from CSV
csv_data = pd.read_csv("C:/Users/lenovo/Downloads/data.csv")
text_data_7 = csv_data.iloc[:, 2].values  # 3th column
text_data_10 = csv_data.iloc[:, 9].values  # 10th column

# Tokenize and pad text data (3th column)
tokenizer_7 = Tokenizer(num_words=10000)  # Limiting to top 10,000 words
tokenizer_7.fit_on_texts(text_data_7)  # Fit tokenizer on the 3th column text data
text_sequences_7 = tokenizer_7.texts_to_sequences(text_data_7)  # Convert text to sequences
X_text_7 = pad_sequences(text_sequences_7, maxlen=100)

# Tokenize and pad text data (10th column)
tokenizer_10 = Tokenizer(num_words=10000)  # Limiting to top 10,000 words
tokenizer_10.fit_on_texts(text_data_10)  # Fit tokenizer on the 10th column text data
text_sequences_10 = tokenizer_10.texts_to_sequences(text_data_10)  # Convert text to sequences
X_text_10 = pad_sequences(text_sequences_10, maxlen=100)

# Concatenate the processed columns (3th and 10th)
X_text_combined = np.hstack([X_text_7, X_text_10])  # Combine the two text columns horizontally

# Split the data into training and testing sets
X_audio_train, X_audio_test, X_text_train, X_text_test, y_train, y_test = train_test_split(
    X_audio, X_text_combined, y_audio, test_size=0.2, random_state=42)

#Building the audio model

# Input layer for audio
audio_input = layers.Input(shape=(128, 216, 1)) 

audio_model = models.Sequential([
    layers.Conv2D(32, (2, 2), activation='relu', input_shape=(128, 216, 1)),  # First Conv Layer
    layers.MaxPooling2D((2, 2)),  # Max Pooling
    layers.Conv2D(64, (2, 2), activation='relu'),  # Second Conv Layer
    layers.MaxPooling2D((2, 2)),  # Max Pooling
    layers.Conv2D(64, (2, 2), activation='relu'),  # Third Conv Layer
    layers.Flatten(),  # Flatten the output
    layers.Dense(64, activation='relu'),  # Fully connected layer
])

audio_output = audio_model(audio_input)  # Pass the input through the audio model

#Build the text model

# Input layer for text
text_input = layers.Input(shape=(200,))  # Input shape is 200 after combining (100 from each column)

text_model = models.Sequential([
    layers.Embedding(input_dim=10000, output_dim=128, input_length=200),  # Embedding layer
    layers.SimpleRNN(64),  # RNN layer for sequence processing
    layers.Dense(64, activation='relu'),  # Fully connected layer
])

text_output = text_model(text_input)  # Pass the input through the text model

#Merge the models

# Concatenate the outputs of both models
combined_input = layers.concatenate([audio_output, text_output])

# Add dense layers after combining both branches
x = layers.Dense(256, activation='relu')(combined_input)  # New layer with 256 neurons
x = layers.Dropout(0.1)(x)  # Dropout after the first dense layer

x = layers.Dense(128, activation='relu')(x)  # Increased to 128 neurons
x = layers.Dropout(0.1)(x)  # Dropout after this layer

x = layers.Dense(64, activation='relu')(x)  # Next layer with 64 neurons
x = layers.Dropout(0.1)(x)  # Dropout after this layer

x = layers.Dense(32, activation='relu')(x)  # Next layer with 32 neurons
x = layers.Dense(16, activation='relu')(x)  # Next layer with 16 neurons

output = layers.Dense(1, activation='sigmoid')(x)  # Final layer with 1 neuron for binary classification

#Create the final model
model = models.Model(inputs=[audio_input, text_input], outputs=output)

#Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0009)  
model.compile(optimizer=optimizer,
              loss='binary_crossentropy',  # Use binary crossentropy for binary classification
              metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
history = model.fit(
    [X_audio_train, X_text_train], y_train, 
    validation_data=([X_audio_test, X_text_test], y_test),  # Validation split using test data
    epochs=20,  # Train for 20 epochs
    batch_size=32  # Set batch size
)

# Evaluate the model 

# Make predictions on the test data
y_pred = model.predict([X_audio_test, X_text_test])
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy, precision, recall, and F1 score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load audio data
X_audio = np.load('C:/Users/lenovo/Downloads/ipwork/spectrogram_data/X.npy')  
y_audio = np.load('C:/Users/lenovo/Downloads/ipwork/Y.npy') 

# Load text data from CSV
csv_data = pd.read_csv("C:/Users/lenovo/Downloads/data.csv")
text_data_7 = csv_data.iloc[:, 2].values  # 3th column
text_data_10 = csv_data.iloc[:, 9].values  # 10th column

# Tokenize and pad text data (3th column)
tokenizer_7 = Tokenizer(num_words=100000)  # Limiting to top 100,000 words
tokenizer_7.fit_on_texts(text_data_7)  # Fit tokenizer on the 3th column text data
text_sequences_7 = tokenizer_7.texts_to_sequences(text_data_7)  # Convert text to sequences
X_text_7 = pad_sequences(text_sequences_7, maxlen=200)

# Tokenize and pad text data (10th column)
tokenizer_10 = Tokenizer(num_words=100000)  # Limiting to top 100,000 words
tokenizer_10.fit_on_texts(text_data_10)  # Fit tokenizer on the 10th column text data
text_sequences_10 = tokenizer_10.texts_to_sequences(text_data_10)  # Convert text to sequences
X_text_10 = pad_sequences(text_sequences_10, maxlen=200)

# Concatenate the processed columns (3th and 10th)
X_text_combined = np.hstack([X_text_7, X_text_10])  # Combine the two text columns horizontally

# Split the data into training and testing sets
X_audio_train, X_audio_test, X_text_train, X_text_test, y_train, y_test = train_test_split(
    X_audio, X_text_combined, y_audio, test_size=0.2, random_state=42)

#Building the audio model

# Input layer for audio
audio_input = layers.Input(shape=(128, 216, 1)) 

audio_model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 216, 1)),  # First Conv Layer
    layers.MaxPooling2D((2, 2)),  # Max Pooling
    layers.Conv2D(64, (3, 3), activation='relu'),  # Second Conv Layer
    layers.MaxPooling2D((2, 2)),  # Max Pooling
    layers.Conv2D(64, (3, 3), activation='relu'),  # Third Conv Layer
    layers.Flatten(),  # Flatten the output
    layers.Dense(64, activation='relu'),  # Fully connected layer
])

audio_output = audio_model(audio_input)  # Pass the input through the audio model

# Build the text model

# Input layer for text
text_input = layers.Input(shape=(400,))  # Input shape is 200 after combining (100 from each column)

text_model = models.Sequential([
    layers.Embedding(input_dim=10000, output_dim=128, input_length=400),  # Embedding layer
    layers.SimpleRNN(64),  # RNN layer for sequence processing  # Dropout layer for regularization
    layers.Dense(64, activation='relu'),  # Fully connected layer
])

text_output = text_model(text_input)  # Pass the input through the text model

#Merge the models

# Concatenate the outputs of both models
combined_input = layers.concatenate([audio_output, text_output])

# Add dense layers after combining both branches
x = layers.Dense(128, activation='relu')(combined_input)  # Increased to 128 neurons
x = layers.Dense(64, activation='relu')(x)  # Next layer with 64 neurons
x = layers.Dense(32, activation='relu')(x)  # Next layer with 32 neurons Another Dropout layer
x = layers.Dense(16, activation='relu')(x)  # Next layer with 16 neurons
output = layers.Dense(1, activation='sigmoid')(x)  # Final layer with 1 neuron

#Create the final model
model = models.Model(inputs=[audio_input, text_input], outputs=output)

#Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)  
model.compile(optimizer=optimizer,
              loss='binary_crossentropy',  # Use binary crossentropy for binary classification
              metrics=['accuracy'])


# Print the model summary
model.summary()

# Train the model
history = model.fit(
    [X_audio_train, X_text_train], y_train, 
    validation_data=([X_audio_test, X_text_test], y_test),  # Validation split using test data
    epochs=20,  # Train for 20 epochs
    batch_size=32  # Set batch size
)

# Model evaluation 

# Predict on test set
y_pred = model.predict([X_audio_test, X_text_test])

# Convert predictions to binary values (0 or 1)
y_pred_binary = (y_pred > 0.5).astype(int)

# Calculate and print the evaluation metrics
accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)
f1 = f1_score(y_test, y_pred_binary)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
