In [71]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, Model
import re
import string
from nltk.corpus import stopwords

# Ensure nltk stopwords are downloaded
import nltk
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Veri yükleme
data = pd.read_excel('Tweets.xlsx')

# Hedef (Target) ve Stance sütunlarını kontrol edin
tweets = data['Tweet']  # Tweetlerin bulunduğu sütun
targets = data['Target']  # Hedef sütunu
stances = data['Stance']  # Duruş sütunu
train_test = data['Train/Test']  # Veri bölünmesini kontrol eden sütun

# Etiketleme (Target ve Stance)
target_encoder = LabelEncoder()
stance_encoder = LabelEncoder()

encoded_targets = target_encoder.fit_transform(targets)
encoded_stances = stance_encoder.fit_transform(stances)

# Train/Test bölme
train_indices = train_test == 'Train'
test_indices = train_test == 'Test'

X_train = tweets[train_indices]
X_test = tweets[test_indices]
y_train_target = encoded_targets[train_indices]
y_train_stance = encoded_stances[train_indices]
y_test_target = encoded_targets[test_indices]
y_test_stance = encoded_stances[test_indices]

# Veri ön işleme fonksiyonu
def preprocess_text(text):
    # 1. Küçük harfe dönüştürme
    text = text.lower()
    
    # 2. Stopwords kaldırma
    text = ' '.join(word for word in text.split() if word not in stop_words)
    
    # 3. # ve @ sembollerini kaldırma
    text = re.sub(r"[@#]", "", text)
    
    # 4. Noktalama işaretlerini kaldırma
    text = text.translate(str.maketrans("", "", string.punctuation))
    
    # 5. Büyük harfleri ayırma (CamelCase)
    text = re.sub(r"([a-z])([A-Z])", r"\1 \2", text)
    
    # 6. Boşluk düzenleme
    text = re.sub(r"\s+", " ", text).strip()
    
    # 7. Lemmatizasyon
    lemmatizer = nltk.WordNetLemmatizer()
    nltk.download('wordnet')
    text = ' '.join(lemmatizer.lemmatize(word) for word in text.split())
    
    # 8. Sayıların normalleştirilmesi
    text = re.sub(r"\d+", "<NUM>", text)
    
    return text

# Tüm tweetlere ön işleme uygulama
data['Cleaned_Tweet'] = data['Tweet'].apply(preprocess_text)

# Tokenization ve padding
max_words = 50000
max_len = 50

X_train = data.loc[train_indices, 'Cleaned_Tweet']
X_test = data.loc[test_indices, 'Cleaned_Tweet']

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/macbook/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/macbook/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/macbook/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/macbook/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/macbook/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/macbook/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/macbook/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/macbook/nltk_data...
[nltk_data]   Package wordnet is already 

In [72]:
X_train_padded

array([[  18,   96,   82, ...,    0,    0,    0],
       [ 670,  119,  106, ...,    0,    0,    0],
       [   3, 1379,    3, ...,    0,    0,    0],
       ...,
       [  12,  125,   84, ...,    0,    0,    0],
       [1793, 2685, 1531, ...,    0,    0,    0],
       [8573, 8574, 8575, ...,    0,    0,    0]], dtype=int32)

In [None]:
model_target = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(6, activation='softmax')  # Sınıf sayısı kadar çıktı
])



# Modeli derleme
model_target.compile(
    loss='sparse_categorical_crossentropy',        # Kayıp fonksiyonu
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),  # Daha düşük öğrenme oranı
    metrics=['accuracy']                           # Değerlendirme metriği
)

# Model özetini yazdırma
print(model_target.summary())

# Modeli eğitme
history = model_target.fit(
    X_train_padded,       # Eğitim veri girişleri
    y_train_target,       # Eğitim hedefleri
    validation_split=0.2, # Doğrulama için %20 ayırma
    batch_size=64,        # Daha büyük batch boyutu
    epochs=20,            # Daha az epoch sayısı, daha iyi hiperparametre optimizasyonu için
    verbose=1             # Eğitim süreci detaylarını göster
)



None
Epoch 1/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 89ms/step - accuracy: 0.1903 - loss: 5.7824 - val_accuracy: 0.1509 - val_loss: 1.7000
Epoch 2/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.2156 - loss: 1.6916 - val_accuracy: 0.2298 - val_loss: 1.6296
Epoch 3/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 108ms/step - accuracy: 0.2521 - loss: 1.6041 - val_accuracy: 0.2298 - val_loss: 1.6408
Epoch 4/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 162ms/step - accuracy: 0.2205 - loss: 1.6317 - val_accuracy: 0.2298 - val_loss: 1.5931
Epoch 5/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 190ms/step - accuracy: 0.2438 - loss: 1.6128 - val_accuracy: 0.1509 - val_loss: 1.7776
Epoch 6/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 200ms/step - accuracy: 0.2243 - loss: 1.6629 - val_accuracy: 0.1509 - val_loss: 1.6439
Epoch 7/20
[1m37/37[0m 

KeyboardInterrupt: 

In [None]:
model_stance = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(np.unique(y_train_stance)), activation='softmax')  # Stance sınıfları kadar çıktı
])

model_stance.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model_stance.summary())

# Eğitim
model_stance.fit(
    X_train_padded,
    y_train_stance,
    validation_split=0.2,
    batch_size=32,
    epochs=10,
    verbose=1
)


None
Epoch 1/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 55ms/step - accuracy: 0.4601 - loss: 1.0665 - val_accuracy: 0.4940 - val_loss: 1.0066
Epoch 2/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 60ms/step - accuracy: 0.6085 - loss: 0.8461 - val_accuracy: 0.5146 - val_loss: 0.9930
Epoch 3/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 69ms/step - accuracy: 0.8143 - loss: 0.4631 - val_accuracy: 0.5626 - val_loss: 1.1339
Epoch 4/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - accuracy: 0.9632 - loss: 0.1320 - val_accuracy: 0.5883 - val_loss: 1.4552
Epoch 5/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 62ms/step - accuracy: 0.9907 - loss: 0.0408 - val_accuracy: 0.5678 - val_loss: 1.6518
Epoch 6/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 64ms/step - accuracy: 0.9953 - loss: 0.0190 - val_accuracy: 0.5901 - val_loss: 1.9571
Epoch 7/10
[1m73/73[0m [32

<keras.src.callbacks.history.History at 0x29b4f4af0>

In [None]:
loss, accuracy = model_target.evaluate(X_test_padded, y_test_target, verbose=1)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")




# Stance Model Değerlendirme
stance_loss, stance_accuracy = model_stance.evaluate(X_test_padded, y_test_stance, verbose=1)
print(f"Stance Model - Test Loss: {stance_loss}, Test Accuracy: {stance_accuracy}")


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 63ms/step - accuracy: 0.1905 - loss: 5.0842
Test Loss: 4.8854498863220215
Test Accuracy: 0.15081799030303955
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.4967 - loss: 3.9165
Stance Model - Test Loss: 3.8806703090667725, Test Accuracy: 0.5035787224769592
