In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Load preprocessed data
X_context_train = np.load("x_context_train.npy")
X_context_val = np.load("x_context_val.npy")
X_sarcasm_train = np.load("x_sarcasm_train.npy")
X_sarcasm_val = np.load("x_sarcasm_val.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")
# Load the original data to get speaker information
data = pd.read_csv("data.csv")

# Ensure the speaker data matches the training samples
speakers = data["SPEAKER"][:len(y_train) + len(y_val)]  # Limit to relevant rows

# Step 1: One-hot encode the speaker column
encoder = OneHotEncoder(sparse_output=False)
speaker_one_hot = encoder.fit_transform(speakers.values.reshape(-1, 1))

# Step 2: Match the speaker one-hot encodings to training and validation splits
X_speaker_train = speaker_one_hot[:len(y_train)]
X_speaker_val = speaker_one_hot[len(y_train):]

# Step 1: Flatten inputs and concatenate features
X_context_flat = X_context_train.reshape(X_context_train.shape[0], -1)
X_sarcasm_flat = X_sarcasm_train.reshape(X_sarcasm_train.shape[0], -1)
X_train_combined = np.concatenate([X_context_flat, X_sarcasm_flat, X_speaker_train], axis=1)

X_context_flat_val = X_context_val.reshape(X_context_val.shape[0], -1)
X_sarcasm_flat_val = X_sarcasm_val.reshape(X_sarcasm_val.shape[0], -1)
X_val_combined = np.concatenate([X_context_flat_val, X_sarcasm_flat_val, X_speaker_val], axis=1)

# Step 2: Train and evaluate models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42),
    "SVM": SVC(kernel='linear', probability=True, random_state=42),
}

results = {}

for name, model in models.items():
    print(name)
    # Train model
    model.fit(X_train_combined, y_train)
    
    # Predictions
    y_pred = model.predict(X_val_combined)
    
    # Metrics
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    accuracy = accuracy_score(y_val, y_pred)
    
    results[name] = {
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1,
        "Accuracy": accuracy,
    }

# Display results
for model, metrics in results.items():
    print(f"\n{model} Results:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd

# Load preprocessed data
X_context_train = np.load("x_context_train.npy")
X_context_val = np.load("x_context_val.npy")
X_sarcasm_train = np.load("x_sarcasm_train.npy")
X_sarcasm_val = np.load("x_sarcasm_val.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Load the original data to get speaker information
data = pd.read_csv("data.csv")

# Ensure the speaker data matches the training samples
speakers = data["SPEAKER"][:len(y_train) + len(y_val)]  # Limit to relevant rows

# Step 1: One-hot encode the speaker column
encoder = OneHotEncoder(sparse_output=False)
speaker_one_hot = encoder.fit_transform(speakers.values.reshape(-1, 1))

# Step 2: Match the speaker one-hot encodings to training and validation splits
X_speaker_train = speaker_one_hot[:len(y_train)]
X_speaker_val = speaker_one_hot[len(y_train):]

# Flatten inputs and prepare data with context
X_context_flat = X_context_train.reshape(X_context_train.shape[0], -1)
X_sarcasm_flat = X_sarcasm_train.reshape(X_sarcasm_train.shape[0], -1)
X_train_with_context = np.concatenate([X_context_flat, X_sarcasm_flat, X_speaker_train], axis=1)

X_context_flat_val = X_context_val.reshape(X_context_val.shape[0], -1)
X_sarcasm_flat_val = X_sarcasm_val.reshape(X_sarcasm_val.shape[0], -1)
X_val_with_context = np.concatenate([X_context_flat_val, X_sarcasm_flat_val, X_speaker_val], axis=1)

# Prepare data without context
X_train_no_context = np.concatenate([X_sarcasm_flat, X_speaker_train], axis=1)
X_val_no_context = np.concatenate([X_sarcasm_flat_val, X_speaker_val], axis=1)

# Define models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42),
    "SVM": SVC(kernel='linear', probability=True, random_state=42),
}



# Train and evaluate models without context
results_no_context = {}

for name, model in models.items():
    print(f"Training {name} without context data")
    model.fit(X_train_no_context, y_train)
    y_pred = model.predict(X_val_no_context)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    accuracy = accuracy_score(y_val, y_pred)
    results_no_context[name] = {
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1,
        "Accuracy": accuracy,
    }



print("\nResults Without Context Data:")
for model, metrics in results_no_context.items():
    print(f"\n{model} Results:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np

# Load preprocessed data
X_sarcasm_train = np.load("x_sarcasm_train.npy")
X_sarcasm_val = np.load("x_sarcasm_val.npy")
y_train = np.load("y_train.npy")
y_val = np.load("y_val.npy")

# Step 1: Flatten sarcasm inputs
X_train_no_context_no_speaker = X_sarcasm_train.reshape(X_sarcasm_train.shape[0], -1)
X_val_no_context_no_speaker = X_sarcasm_val.reshape(X_sarcasm_val.shape[0], -1)

# Define models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42),
    "SVM": SVC(kernel='linear', probability=True, random_state=42),
}

# Train and evaluate models without context or speaker info
results_no_context_no_speaker = {}

for name, model in models.items():
    print(f"Training {name} with only sarcasm features (no context, no speaker info)")
    model.fit(X_train_no_context_no_speaker, y_train)
    y_pred = model.predict(X_val_no_context_no_speaker)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    accuracy = accuracy_score(y_val, y_pred)
    results_no_context_no_speaker[name] = {
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1,
        "Accuracy": accuracy,
    }

# Display results
print("\nResults Without Context or Speaker Information:")
for model, metrics in results_no_context_no_speaker.items():
    print(f"\n{model} Results:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")
