### **Installing Necessary Libraries**

In [368]:
import numpy as np
import pandas as pd
import re
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

### **Data Loading and Preprocessing**

In [369]:
# Örnek: veriyi okuma
df = pd.read_excel("Tweets.xlsx")

df = df[df["Target"] != "Donald Trump"]

# Kolon isimlerinin aynen kaldığını varsayıyoruz:
# ["Tweet", "Target", "Train/Test", "Stance", "Opinion Toward", "Sentiment labels"]

def basic_preprocessing(text):
    # Küçük harfe çevir
    text = text.lower()
    # @ ile başlayan kelimeleri temizle
    text = re.sub(r'@\w+', '', text)
    # Özel karakterleri temizle, sadece harf, rakam, boşluk, '-', '_', ve '#' karakterlerini bırak
    text = re.sub(r"[^\w\s#_-]+", " ", text)
    # Fazla boşlukları sil
    text = re.sub(r"\s+", " ", text)
    text = text.strip()  # Boşlukları (trim) temizle
    return text

df["Preprocessed_Tweet"] = df["Tweet"].apply(basic_preprocessing)





### **Editing Category (Target) Classes and Stance (FAVOR / AGAINST / NEITHER) Classes**

In [370]:
# Target için LabelEncoder
le_target = LabelEncoder()
df["Target_label"] = le_target.fit_transform(df["Target"])

# Örnek olarak "Atheism=0, Hillary Clinton=1, Feminist Mov=2, ..." gibi dönüştürmüş olacak.
num_classes_target = len(df["Target_label"].unique())


# Stance için LabelEncoder
le_stance = LabelEncoder()
df["Stance_label"] = le_stance.fit_transform(df["Stance"])
num_classes_stance = len(df["Stance_label"].unique())

### **Training/Validation Spliting**

In [371]:
df_train = df[df["Train/Test"] == "Train"].copy()
df_test  = df[df["Train/Test"] == "Test"].copy()

X_train_text = df_train["Preprocessed_Tweet"].values
y_train_target = df_train["Target_label"].values
y_train_stance = df_train["Stance_label"].values

X_test_text = df_test["Preprocessed_Tweet"].values
y_test_target = df_test["Target_label"].values
y_test_stance = df_test["Stance_label"].values


X_tr_text, X_val_text, y_tr_target, y_val_target = train_test_split(
    X_train_text, y_train_target, test_size=0.2, random_state=42
)

X_tr_text_s, X_val_text_s, y_tr_stance, y_val_stance = train_test_split(
    X_train_text, y_train_stance, test_size=0.2, random_state=42
)

### **Tokenize and Sequence Texts**

In [372]:
MAX_VOCAB_SIZE = 10000   # Sözlük boyutu (isteğe göre artırılabilir)
MAX_SEQ_LEN = 30         # Maksimum token sayısı

tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(df_train["Preprocessed_Tweet"])

def text_to_seq(texts):
    sequences = tokenizer.texts_to_sequences(texts)
    padded = pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post', truncating='post')
    return padded

X_tr_seq = text_to_seq(X_tr_text)
X_val_seq = text_to_seq(X_val_text)
X_test_seq = text_to_seq(X_test_text)

### **Model–1: Target Classification - Model Architecture**

In [373]:
model_target = keras.models.Sequential([
    layers.Embedding(input_dim=MAX_VOCAB_SIZE, output_dim=128, 
                     input_length=MAX_SEQ_LEN),
    layers.LSTM(128, return_sequences=False),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes_target, activation='softmax')
])

model_target.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(0.001),
    metrics=['accuracy']
)



### **Traninig**

In [374]:
epochs = 20
batch_size = 32

history_target = model_target.fit(
    X_tr_seq, y_tr_target,
    validation_data=(X_val_seq, y_val_target),
    epochs=20,
    batch_size=batch_size
)



Epoch 1/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.2388 - loss: 1.5902 - val_accuracy: 0.3499 - val_loss: 1.4151
Epoch 2/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.4021 - loss: 1.2883 - val_accuracy: 0.3739 - val_loss: 1.3121
Epoch 3/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.4631 - loss: 1.0535 - val_accuracy: 0.3962 - val_loss: 1.3513
Epoch 4/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.5335 - loss: 0.9067 - val_accuracy: 0.4734 - val_loss: 1.5173
Epoch 5/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.7359 - loss: 0.6239 - val_accuracy: 0.6158 - val_loss: 1.1487
Epoch 6/20
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.8479 - loss: 0.4096 - val_accuracy: 0.6261 - val_loss: 1.2779
Epoch 7/20
[1m73/73[0m [32m━━━━

### **Evaluation (Target)**

In [375]:
# Validation set üzerinde
val_preds_target = model_target.predict(X_val_seq).argmax(axis=1)
print("F1 Score (Target classification) [Val]:", f1_score(y_val_target, val_preds_target, average="weighted"))
print(classification_report(y_val_target, val_preds_target))

# Test set üzerinde
X_test_seq = text_to_seq(X_test_text)  # test verisine de preprocess
test_preds_target = model_target.predict(X_test_seq).argmax(axis=1)
print("F1 Score (Target classification) [Test]:", 
      f1_score(y_test_target, test_preds_target, average="weighted"))

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
F1 Score (Target classification) [Val]: 0.6386058096606255
              precision    recall  f1-score   support

           0       0.62      0.68      0.65       117
           1       0.63      0.49      0.55        79
           2       0.61      0.72      0.66       135
           3       0.74      0.62      0.68       125
           4       0.63      0.62      0.62       127

    accuracy                           0.64       583
   macro avg       0.64      0.63      0.63       583
weighted avg       0.64      0.64      0.64       583

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
F1 Score (Target classification) [Test]: 0.6338116733972252


### **Model 2 - Stance Classification - Model Input: Tweet Text + Target Information**

In [376]:
# Train aşamasında ground-truth target label kullanıyoruz
target_onehot_train = to_categorical(y_tr_target, num_classes_target)
target_onehot_val   = to_categorical(y_val_target, num_classes_target)

# Tweet metni embed’ine ek veri olarak one-hot vektörü eklemek için
# Keras’ta bir “functional API” model kuralım:
tweet_input = layers.Input(shape=(MAX_SEQ_LEN,), name="tweet_input")
target_input = layers.Input(shape=(num_classes_target,), name="target_input")

# Tweet embeding + RNN
embedding_layer = layers.Embedding(input_dim=MAX_VOCAB_SIZE, 
                                   output_dim=128, 
                                   input_length=MAX_SEQ_LEN)(tweet_input)
lstm_layer = layers.LSTM(128, return_sequences=False)(embedding_layer)

# RNN çıktısıyla target one-hot’u birleştir
concat = layers.concatenate([lstm_layer, target_input])

dense = layers.Dense(64, activation='relu')(concat)
output = layers.Dense(num_classes_stance, activation='softmax')(dense)

model_stance = keras.Model(inputs=[tweet_input, target_input], outputs=output)

model_stance.compile(
    loss='sparse_categorical_crossentropy',
    optimizer= tf.keras.optimizers.Adam(0.008),
    metrics=['accuracy']
)




### **Model Traning**

In [377]:
history_stance = model_stance.fit(
    [X_tr_seq, target_onehot_train],  # Girdi
    y_tr_stance,                      # Çıktı stance label
    validation_data=([X_val_seq, target_onehot_val], y_val_stance),
    epochs=8,
    batch_size=batch_size
)

Epoch 1/8
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.4826 - loss: 1.0474 - val_accuracy: 0.5455 - val_loss: 0.9813
Epoch 2/8
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.5460 - loss: 0.9677 - val_accuracy: 0.5043 - val_loss: 0.9542
Epoch 3/8
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.6550 - loss: 0.7690 - val_accuracy: 0.5472 - val_loss: 1.0700
Epoch 4/8
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.7728 - loss: 0.4879 - val_accuracy: 0.5523 - val_loss: 1.1920
Epoch 5/8
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.8220 - loss: 0.3688 - val_accuracy: 0.5472 - val_loss: 1.5532
Epoch 6/8
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.9274 - loss: 0.2071 - val_accuracy: 0.5763 - val_loss: 1.5917
Epoch 7/8
[1m73/73[0m [32m━━━━━━━━━━━

### **Evaluation (Stance)**

In [378]:
# 1) Model–1 ile test verisindeki target tahmini:
test_preds_target = model_target.predict(X_test_seq).argmax(axis=1)
test_preds_target_onehot = to_categorical(test_preds_target, num_classes_target)

# 2) Model–2 ile stance tahmini:
test_preds_stance = model_stance.predict([X_test_seq, test_preds_target_onehot]).argmax(axis=1)

# 3) Gerçek stance etiketleriyle kıyaslama:
print("F1 Score (Stance classification) [Test]:", 
      f1_score(y_test_stance, test_preds_stance, average="weighted"))
print(classification_report(y_test_stance, test_preds_stance))

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
F1 Score (Stance classification) [Test]: 0.5527361859353696
              precision    recall  f1-score   support

           0       0.71      0.61      0.66       715
           1       0.43      0.45      0.44       304
           2       0.33      0.44      0.38       230

    accuracy                           0.54      1249
   macro avg       0.49      0.50      0.49      1249
weighted avg       0.57      0.54      0.55      1249

