### **Installing Necessary Libraries**

In [55]:
import numpy as np
import pandas as pd
import re
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

### **Data Loading and Preprocessing**

In [56]:
# Örnek: veriyi okuma
df = pd.read_excel("Tweets.xlsx")

# Kolon isimlerinin aynen kaldığını varsayıyoruz:
# ["Tweet", "Target", "Train/Test", "Stance", "Opinion Toward", "Sentiment labels"]

# Temel text preprocessing
def basic_preprocessing(text):
    # Küçük harfe çevir
    text = text.lower()
    
    # Özel karakterleri temizle, sadece harf, rakam, boşluk, '-', '_', ve '#' karakterlerini bırak
    text = re.sub(r"[^\w\s#_-]+", " ", text)
    
    text = re.sub(r"[^a-zA-Z0-9ığüşöçİĞÜŞÖÇ]+", " ", text)  # Noktalama ve özel karakterleri sil
    text = text.strip()                          # Boşlukları (trim) temizle
    return text

df["Preprocessed_Tweet"] = df["Tweet"].apply(basic_preprocessing)


### **Editing Category (Target) Classes and Stance (FAVOR / AGAINST / NEITHER) Classes**

In [57]:
# Target için LabelEncoder
le_target = LabelEncoder()
df["Target_label"] = le_target.fit_transform(df["Target"])

# Örnek olarak "Atheism=0, Hillary Clinton=1, Feminist Mov=2, ..." gibi dönüştürmüş olacak.
num_classes_target = len(df["Target_label"].unique())


# Stance için LabelEncoder
le_stance = LabelEncoder()
df["Stance_label"] = le_stance.fit_transform(df["Stance"])
num_classes_stance = len(df["Stance_label"].unique())

### **Training/Validation Spliting**

In [58]:
df_train = df[df["Train/Test"] == "Train"].copy()
df_test  = df[df["Train/Test"] == "Test"].copy()

X_train_text = df_train["Preprocessed_Tweet"].values
y_train_target = df_train["Target_label"].values
y_train_stance = df_train["Stance_label"].values

X_test_text = df_test["Preprocessed_Tweet"].values
y_test_target = df_test["Target_label"].values
y_test_stance = df_test["Stance_label"].values


X_tr_text, X_val_text, y_tr_target, y_val_target = train_test_split(
    X_train_text, y_train_target, test_size=0.2, random_state=42
)

X_tr_text_s, X_val_text_s, y_tr_stance, y_val_stance = train_test_split(
    X_train_text, y_train_stance, test_size=0.2, random_state=42
)

### **Tokenize and Sequence Texts**

In [59]:
MAX_VOCAB_SIZE = 10000   # Sözlük boyutu (isteğe göre artırılabilir)
MAX_SEQ_LEN = 30         # Maksimum token sayısı

tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(df_train["Preprocessed_Tweet"])

def text_to_seq(texts):
    sequences = tokenizer.texts_to_sequences(texts)
    padded = pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post', truncating='post')
    return padded

X_tr_seq = text_to_seq(X_tr_text)
X_val_seq = text_to_seq(X_val_text)
X_test_seq = text_to_seq(X_test_text)

### **Model–1: Target Classification - Model Architecture**

In [60]:
model_target = keras.models.Sequential([
    layers.Embedding(input_dim=MAX_VOCAB_SIZE, output_dim=128, 
                     input_length=MAX_SEQ_LEN),
    layers.LSTM(64, return_sequences=False),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes_target, activation='softmax')
])

model_target.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)



### **Traninig**

In [61]:
epochs = 20
batch_size = 32

history_target = model_target.fit(
    X_tr_seq, y_tr_target,
    validation_data=(X_val_seq, y_val_target),
    epochs=100,
    batch_size=batch_size
)



Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.2420 - loss: 1.6797 - val_accuracy: 0.2196 - val_loss: 1.6099
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.3225 - loss: 1.5411 - val_accuracy: 0.4254 - val_loss: 1.3174
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.5606 - loss: 1.0497 - val_accuracy: 0.5455 - val_loss: 1.1579
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.7541 - loss: 0.6330 - val_accuracy: 0.6158 - val_loss: 1.2200
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.9129 - loss: 0.2919 - val_accuracy: 0.6449 - val_loss: 1.2845
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9649 - loss: 0.1489 - val_accuracy: 0.6484 - val_loss: 1.4652
Epoch 7/100
[1m73/73[0m [

### **Evaluation (Target)**

In [62]:
# Validation set üzerinde
val_preds_target = model_target.predict(X_val_seq).argmax(axis=1)
print("F1 Score (Target classification) [Val]:", f1_score(y_val_target, val_preds_target, average="weighted"))
print(classification_report(y_val_target, val_preds_target))

# Test set üzerinde
X_test_seq = text_to_seq(X_test_text)  # test verisine de preprocess
test_preds_target = model_target.predict(X_test_seq).argmax(axis=1)
print("F1 Score (Target classification) [Test]:", 
      f1_score(y_test_target, test_preds_target, average="weighted"))

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
F1 Score (Target classification) [Val]: 0.687896412094745
              precision    recall  f1-score   support

           0       0.66      0.70      0.68       117
           1       0.54      0.59      0.57        79
           3       0.70      0.73      0.72       135
           4       0.82      0.70      0.76       125
           5       0.68      0.66      0.67       127

    accuracy                           0.69       583
   macro avg       0.68      0.68      0.68       583
weighted avg       0.69      0.69      0.69       583

[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
F1 Score (Target classification) [Test]: 0.3261751542573221


### **Model 2 - Stance Classification - Model Input: Tweet Text + Target Information**

In [63]:
# Train aşamasında ground-truth target label kullanıyoruz
target_onehot_train = to_categorical(y_tr_target, num_classes_target)
target_onehot_val   = to_categorical(y_val_target, num_classes_target)

# Tweet metni embed’ine ek veri olarak one-hot vektörü eklemek için
# Keras’ta bir “functional API” model kuralım:
tweet_input = layers.Input(shape=(MAX_SEQ_LEN,), name="tweet_input")
target_input = layers.Input(shape=(num_classes_target,), name="target_input")

# Tweet embeding + RNN
embedding_layer = layers.Embedding(input_dim=MAX_VOCAB_SIZE, 
                                   output_dim=128, 
                                   input_length=MAX_SEQ_LEN)(tweet_input)
lstm_layer = layers.LSTM(128, return_sequences=False)(embedding_layer)

# RNN çıktısıyla target one-hot’u birleştir
concat = layers.concatenate([lstm_layer, target_input])

dense = layers.Dense(64, activation='relu')(concat)
output = layers.Dense(num_classes_stance, activation='softmax')(dense)

model_stance = keras.Model(inputs=[tweet_input, target_input], outputs=output)

model_stance.compile(
    loss='sparse_categorical_crossentropy',
    optimizer= tf.keras.optimizers.SGD(0.01),
    metrics=['accuracy']
)




### **Model Traning**

In [64]:
history_stance = model_stance.fit(
    [X_tr_seq, target_onehot_train],  # Girdi
    y_tr_stance,                      # Çıktı stance label
    validation_data=([X_val_seq, target_onehot_val], y_val_stance),
    epochs=100,
    batch_size=batch_size
)

Epoch 1/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.3273 - loss: 1.1047 - val_accuracy: 0.5455 - val_loss: 1.0572
Epoch 2/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5411 - loss: 1.0513 - val_accuracy: 0.5455 - val_loss: 1.0306
Epoch 3/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5368 - loss: 1.0370 - val_accuracy: 0.5455 - val_loss: 1.0180
Epoch 4/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5461 - loss: 1.0216 - val_accuracy: 0.5455 - val_loss: 1.0112
Epoch 5/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5321 - loss: 1.0259 - val_accuracy: 0.5455 - val_loss: 1.0048
Epoch 6/100
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5526 - loss: 1.0014 - val_accuracy: 0.5455 - val_loss: 1.0013
Epoch 7/100
[1m73/73[0m [

### **Evaluation (Stance)**

In [65]:
# 1) Model–1 ile test verisindeki target tahmini:
test_preds_target = model_target.predict(X_test_seq).argmax(axis=1)
test_preds_target_onehot = to_categorical(test_preds_target, num_classes_target)

# 2) Model–2 ile stance tahmini:
test_preds_stance = model_stance.predict([X_test_seq, test_preds_target_onehot]).argmax(axis=1)

# 3) Gerçek stance etiketleriyle kıyaslama:
print("F1 Score (Stance classification) [Test]:", 
      f1_score(y_test_stance, test_preds_stance, average="weighted"))
print(classification_report(y_test_stance, test_preds_stance))

[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
F1 Score (Stance classification) [Test]: 0.4288603615692548
              precision    recall  f1-score   support

           0       0.56      0.90      0.69      1014
           1       0.38      0.26      0.31       452
           2       0.22      0.00      0.01       490

    accuracy                           0.53      1956
   macro avg       0.38      0.39      0.33      1956
weighted avg       0.43      0.53      0.43      1956

