In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score

# Load preprocessed data
X_context_train = np.load("x_context_train.npy").reshape(-1, 128 * 216) 
X_context_val = np.load("x_context_val.npy").reshape(-1, 128 * 216)      
X_sarcasm_train = np.load("x_sarcasm_train.npy").reshape(-1, 128 * 216) 
X_sarcasm_val = np.load("x_sarcasm_val.npy").reshape(-1, 128 * 216)      
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Load the original data to get speaker information
data = pd.read_csv("C:/Users/lenovo/Downloads/data.csv")

# Ensure the speaker data matches the training samples
speakers = data["SPEAKER"][:len(y_train) + len(y_val)]  # Limit to relevant rows

# One-hot encode the speaker column
encoder = OneHotEncoder(sparse_output=False)
speaker_one_hot = encoder.fit_transform(speakers.values.reshape(-1, 1))

# Match the speaker one-hot encodings to training and validation splits
X_speaker_train = speaker_one_hot[:len(y_train)]
X_speaker_val = speaker_one_hot[len(y_train):]

# Build the ANN model
context_input = layers.Input(shape=(128 * 216,), name="context_input")  # Flattened input
sarcasm_input = layers.Input(shape=(128 * 216,), name="sarcasm_input")  # Flattened input
speaker_input = layers.Input(shape=(speaker_one_hot.shape[1],), name="speaker_input")

# ANN branches for each input
context_branch = layers.Dense(128, activation="relu")(context_input)
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_input)
speaker_branch = layers.Dense(32, activation="relu")(speaker_input)

# Combine all branches
combined = layers.Concatenate()([context_branch, sarcasm_branch, speaker_branch])
x = layers.Dense(64, activation="relu")(combined)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(
    inputs=[context_input, sarcasm_input, speaker_input],  # Include speaker_input
    outputs=output,
)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)
model.summary()

# Train the model
history = model.fit(
    [X_context_train, X_sarcasm_train, X_speaker_train],
    y_train,
    epochs=10,
    batch_size=64,
    validation_data=([X_context_val, X_sarcasm_val, X_speaker_val], y_val),
)

# Evaluate the model
loss, accuracy = model.evaluate([X_context_val, X_sarcasm_val, X_speaker_val], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Make predictions and calculate additional metrics
y_pred_probs = model.predict([X_context_val, X_sarcasm_val, X_speaker_val])
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate metrics
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
accuracy = accuracy_score(y_val, y_pred)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=["Not Sarcastic", "Sarcastic"]))

# Save the model
model.save("context_sarcasm_ann_model.h5")
print("Model saved as 'context_sarcasm_ann_model.h5'")


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#Load audio data
X_audio = np.load('C:/Users/lenovo/Downloads/ipwork/spectrogram_data/X.npy')  
y_audio = np.load('C:/Users/lenovo/Downloads/ipwork/Y.npy') 

# Load text data from CSV 
csv_data = pd.read_csv("C:/Users/lenovo/Downloads/data.csv")
text_data_7 = csv_data.iloc[:, 2].values  # 3th column
text_data_10 = csv_data.iloc[:, 9].values  # 10th column

# Tokenize and pad text data (3th column)
tokenizer_7 = Tokenizer(num_words=10000)
tokenizer_7.fit_on_texts(text_data_7)
text_sequences_7 = tokenizer_7.texts_to_sequences(text_data_7)
X_text_7 = pad_sequences(text_sequences_7, maxlen=100)

# Tokenize and pad text data (10th column)
tokenizer_10 = Tokenizer(num_words=10000)
tokenizer_10.fit_on_texts(text_data_10)
text_sequences_10 = tokenizer_10.texts_to_sequences(text_data_10)
X_text_10 = pad_sequences(text_sequences_10, maxlen=100)

# Combine text data
X_text_combined = np.hstack([X_text_7, X_text_10])

# Flatten audio data for ANN
X_audio_flat = X_audio.reshape(X_audio.shape[0], -1)

#Split data into training and testing sets
X_audio_train, X_audio_test, X_text_train, X_text_test, y_train, y_test = train_test_split(
    X_audio_flat, X_text_combined, y_audio, test_size=0.2, random_state=42)

#Build the ANN model for combined audio and text
audio_input = layers.Input(shape=(X_audio_flat.shape[1],))
text_input = layers.Input(shape=(X_text_combined.shape[1],))

# Audio branch
audio_branch = layers.Dense(128, activation='relu')(audio_input)

# Text branch
text_branch = layers.Dense(128, activation='relu')(text_input)

# Combine both branches
combined = layers.concatenate([audio_branch, text_branch])
x = layers.Dense(64, activation='relu')(combined)
x = layers.Dense(32, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)

# Create and compile the ANN model
model_combined = models.Model(inputs=[audio_input, text_input], outputs=output)
model_combined.compile(optimizer='adam',
                       loss='binary_crossentropy',
                       metrics=['accuracy'])

# Print model summary
model_combined.summary()

# Train the ANN model
history_combined = model_combined.fit(
    [X_audio_train, X_text_train], y_train,
    validation_data=([X_audio_test, X_text_test], y_test),
    epochs=20,
    batch_size=32
)

# Evaluate the model
y_pred_probs = model_combined.predict([X_audio_test, X_text_test])
y_pred = (y_pred_probs > 0.5).astype(int)

# Calculate metrics
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Class 0", "Class 1"]))


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score

# Load audio data
X_audio = np.load('C:/Users/lenovo/Downloads/ipwork/spectrogram_data/X.npy')  
y_audio = np.load('C:/Users/lenovo/Downloads/ipwork/Y.npy') 

# Split audio data into training and testing sets
X_audio_train, X_audio_test, y_train, y_test = train_test_split(
    X_audio, y_audio, test_size=0.2, random_state=42)

# Flatten audio data for ANN
X_audio_train_flat = X_audio_train.reshape(X_audio_train.shape[0], -1)
X_audio_test_flat = X_audio_test.reshape(X_audio_test.shape[0], -1)

# Build the ANN model for audio
model_audio = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_audio_train_flat.shape[1],)),  # First Dense Layer
    layers.Dense(64, activation='relu'),  # Second Dense Layer
    layers.Dense(32, activation='relu'),  # Third Dense Layer
    layers.Dense(1, activation='sigmoid')  # Output Layer for Binary Classification
])

# Compile the model
model_audio.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

# Print model summary
model_audio.summary()

# Train the ANN model
history_audio = model_audio.fit(
    X_audio_train_flat, y_train,
    validation_data=(X_audio_test_flat, y_test),
    epochs=16,
    batch_size=32
)

# Evaluate the model on test data
loss, accuracy = model_audio.evaluate(X_audio_test_flat, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Generate predictions and convert probabilities to binary labels
y_pred_probs = model_audio.predict(X_audio_test_flat)
y_pred = (y_pred_probs > 0.5).astype(int)

# Calculate metrics
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Print evaluation metrics
print("\nEvaluation Metrics:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Class 0", "Class 1"]))


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

# Load preprocessed data
X_context_train = np.load("x_context_train.npy")
X_context_val = np.load("x_context_val.npy")
X_sarcasm_train = np.load("x_sarcasm_train.npy")
X_sarcasm_val = np.load("x_sarcasm_val.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Flatten the input data for ANN (since it's not CNN now)
X_context_train_flat = X_context_train.reshape(X_context_train.shape[0], -1)
X_context_val_flat = X_context_val.reshape(X_context_val.shape[0], -1)
X_sarcasm_train_flat = X_sarcasm_train.reshape(X_sarcasm_train.shape[0], -1)
X_sarcasm_val_flat = X_sarcasm_val.reshape(X_sarcasm_val.shape[0], -1)

# Build the ANN model
context_input = layers.Input(shape=(X_context_train_flat.shape[1],), name="context_input")
sarcasm_input = layers.Input(shape=(X_sarcasm_train_flat.shape[1],), name="sarcasm_input")

# Context branch (Dense layers)
context_branch = layers.Dense(128, activation="relu")(context_input)
context_branch = layers.Dense(64, activation="relu")(context_branch)
context_branch = layers.Dense(32, activation="relu")(context_branch)

# Sarcasm branch (Dense layers)
sarcasm_branch = layers.Dense(128, activation="relu")(sarcasm_input)
sarcasm_branch = layers.Dense(64, activation="relu")(sarcasm_branch)
sarcasm_branch = layers.Dense(32, activation="relu")(sarcasm_branch)

# Combine both branches
combined = layers.Concatenate()([context_branch, sarcasm_branch])
x = layers.Dense(64, activation="relu")(combined)
output = layers.Dense(1, activation="sigmoid")(x)

# Create and compile the model
model = models.Model(inputs=[context_input, sarcasm_input], outputs=output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

# Train the model
history = model.fit(
    [X_context_train_flat, X_sarcasm_train_flat], y_train,
    epochs=6,
    batch_size=32,
    validation_data=([X_context_val_flat, X_sarcasm_val_flat], y_val),
)

# Evaluate the model
loss, accuracy = model.evaluate([X_context_val_flat, X_sarcasm_val_flat], y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# Calculate F1 Score, Precision, Recall, and Accuracy
y_pred_probs = model.predict([X_context_val_flat, X_sarcasm_val_flat])  # Predict probabilities
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate metrics
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
accuracy = accuracy_score(y_val, y_pred)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=["Not Sarcastic", "Sarcastic"]))

# Save the model
# model.save("context_sarcasm_ann_model.h5")
print("Model saved as 'context_sarcasm_ann_model.h5'")
