In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb

# Yalnızca en sık geçen 10.000 kelimeyi kullan (diğerlerini dışla)
num_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Kelime indeks sözlüğünü al
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

# Bir örnek yorumun metne çevrilmiş hali:
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in x_train[0]])

print("Örnek yorum (metin):", decoded_review)
print("Etiketi:", "Pozitif" if y_train[0] == 1 else "Negatif")

Örnek yorum (metin): ? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they ha

In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# 1. Sayısal dizileri tekrar metne çevir (ilk 10.000 kelime sınırıyla)
def decode_review(sequence):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in sequence])

x_train_text = [decode_review(x) for x in x_train]
x_test_text = [decode_review(x) for x in x_test]

# 2. TF-IDF vektörlerine çevir
vectorizer = TfidfVectorizer(max_features=10000)
X_train_tfidf = vectorizer.fit_transform(x_train_text)
X_test_tfidf = vectorizer.transform(x_test_text)

# 3. Lojistik Regresyon Modeli
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

# 4. Test verisi ile tahmin
y_pred = model.predict(X_test_tfidf)

# 5. Değerlendirme
accuracy = accuracy_score(y_test, y_pred)
print(f"Doğruluk: {accuracy:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Doğruluk: 0.8842

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88     12500
           1       0.88      0.88      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Maksimum yorum uzunluğu (tüm diziler bu uzunlukta olacak)
maxlen = 200

# Yorumları aynı uzunluğa getir
x_train_pad = pad_sequences(x_train, maxlen=maxlen)
x_test_pad = pad_sequences(x_test, maxlen=maxlen)

# Modeli oluştur
model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=maxlen),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Eğitim
history = model.fit(x_train_pad, y_train, epochs=3, batch_size=128, validation_split=0.2)

# Test verisi ile değerlendir
loss, accuracy = model.evaluate(x_test_pad, y_test)
print(f"\nTest Doğruluğu: {accuracy:.4f}")



Epoch 1/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 392ms/step - accuracy: 0.6923 - loss: 0.5689 - val_accuracy: 0.8294 - val_loss: 0.3913
Epoch 2/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 395ms/step - accuracy: 0.9001 - loss: 0.2597 - val_accuracy: 0.8634 - val_loss: 0.3120
Epoch 3/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 392ms/step - accuracy: 0.9350 - loss: 0.1782 - val_accuracy: 0.8290 - val_loss: 0.3739
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 33ms/step - accuracy: 0.8314 - loss: 0.3795

Test Doğruluğu: 0.8334


SONUÇ KARŞILAŞTIRMASI

Word Embedding KULLANMADAN:

-Kelime sırasını dikkate almaz.

-Daha hızlı çalışır.

-Temel modelleme için yeterlidir.

-Ancak bağlam ve anlam eksikliği nedeniyle sınırlı başarı sağlar.




Word Embedding KULLANARAK:

-Kelimeler arası bağlamı öğrenir.

-Daha iyi genelleme yapar.

-Eğitim süresi daha uzundur.

-Sonuçlar genellikle daha başarılıdır.

Embedding projector görselleştirmesi:

In [1]:
# Embedding katmanını al
embedding_layer = model.layers[0]
embedding_weights = embedding_layer.get_weights()[0]

print("Embedding shape:", embedding_weights.shape)

NameError: name 'model' is not defined

In [None]:
# Kelime indekslerini al
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

# İlk 1000 kelimeyi görselleştir (ID 3'ten başla)
with open("/content/metadata.tsv", "w", encoding="utf-8") as f:
    for i in range(3, 1003):  # ilk 3 ID özel token
        word = reverse_word_index.get(i - 3, "?")
        f.write(word + "\n")

In [None]:
with open("/content/vectors.tsv", "w", encoding="utf-8") as f:
    for i in range(3, 1003):
        vector = embedding_weights[i]
        f.write('\t'.join([str(x) for x in vector]) + "\n")

In [None]:
from google.colab import files

files.download("/content/metadata.tsv")
files.download("/content/vectors.tsv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>