In [1]:
import pandas as pd
import numpy as np
import re
import tensorflow as tf
import pickle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

# Sabitler
MAX_LEN = 30
VOCAB_SIZE = 5000
EMBED_DIM = 128

# 1) Veri Yükle
data = pd.read_csv("mood_dataset.csv")

# 2) Temizleme Fonksiyonu
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zçğıöşü0-9 ]', '', text)
    return text

data['clean_sentence'] = data['sentences'].apply(clean_text)

# 3) Tokenizer ve Padding
tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="<OOV>")
tokenizer.fit_on_texts(data['clean_sentence'])
sequences = tokenizer.texts_to_sequences(data['clean_sentence'])
padded = pad_sequences(sequences, maxlen=MAX_LEN, padding='post', truncating='post')
labels = data['label'].values

# 4) Veri Böl
X_train, X_test, y_train, y_test = train_test_split(padded, labels, test_size=0.2, random_state=42)

# 5) Class Weights
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

# 6) Model - Derin ve Bidirectional LSTM
model = Sequential([
    Embedding(input_dim=VOCAB_SIZE, output_dim=EMBED_DIM),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(32)),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(3, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 7) Erken Durdurma
early_stop = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

# 8) Eğit
history = model.fit(
    X_train, y_train,
    epochs=25,
    validation_data=(X_test, y_test),
    class_weight=class_weights,
    callbacks=[early_stop],
    verbose=2
)

# 9) Değerlendirme
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes, zero_division=0))

# 10) Kaydet
model.save("model.keras")
with open("tokenizer.pickle", "wb") as f:
    pickle.dump(tokenizer, f)


Epoch 1/25
9/9 - 4s - 491ms/step - accuracy: 0.2955 - loss: 1.1043 - val_accuracy: 0.3333 - val_loss: 1.1001
Epoch 2/25
9/9 - 0s - 30ms/step - accuracy: 0.2917 - loss: 1.0979 - val_accuracy: 0.3030 - val_loss: 1.0978
Epoch 3/25
9/9 - 0s - 28ms/step - accuracy: 0.3598 - loss: 1.0916 - val_accuracy: 0.3333 - val_loss: 1.0933
Epoch 4/25
9/9 - 0s - 28ms/step - accuracy: 0.4470 - loss: 1.0758 - val_accuracy: 0.4091 - val_loss: 1.0790
Epoch 5/25
9/9 - 0s - 27ms/step - accuracy: 0.5076 - loss: 1.0237 - val_accuracy: 0.5909 - val_loss: 1.0333
Epoch 6/25
9/9 - 0s - 28ms/step - accuracy: 0.7197 - loss: 0.8516 - val_accuracy: 0.6212 - val_loss: 0.9664
Epoch 7/25
9/9 - 0s - 29ms/step - accuracy: 0.8561 - loss: 0.5349 - val_accuracy: 0.5303 - val_loss: 1.0068
Epoch 8/25
9/9 - 0s - 27ms/step - accuracy: 0.9470 - loss: 0.2328 - val_accuracy: 0.5303 - val_loss: 1.4263
Epoch 9/25
9/9 - 0s - 26ms/step - accuracy: 0.9621 - loss: 0.1303 - val_accuracy: 0.5758 - val_loss: 1.8092
Epoch 10/25
9/9 - 0s - 27ms