In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Embedding, GlobalAveragePooling1D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import L2

In [None]:
splits = {'train': 'train.csv', 'test': 'test.csv'}
df = pd.read_csv("hf://datasets/winvoker/turkish-sentiment-analysis-dataset/" + splits["train"])
df = df.drop(["dataset"] , axis=1)
df_Positive = df[df['label']=='Positive']
df_Negative = df[df['label']== 'Negative']
df_Notr = df[df['label']=='Notr']
df_Notr_az = df_Notr.sample(50000)
df_Positive_az = df_Positive.sample(50000)
df_Negative_az = df_Negative.sample(50000)
df = pd.concat([df_Positive_az, df_Notr_az, df_Negative_az])

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled")
    except RuntimeError as e:
        print(e)

texts = df['text'].tolist()
sentiments = df['label'].tolist()


tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
vocab_size = len(tokenizer.word_index) + 1

max_length = 100 
X = pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=max_length)

sentiment_map = {'Positive': 0, 'Notr': 1, 'Negative': 2}
y = [sentiment_map[sentiment] for sentiment in sentiments]
y = to_categorical(y)

X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(y, dtype=tf.float32)

with tf.device('/GPU:0'): 
    inputs = Input(shape=(max_length,))
    x = Embedding(vocab_size, 128)(inputs)
    x = LSTM(64, return_sequences=True)(x)
    x = GlobalMaxPooling1D()(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(3, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

with tf.device('/GPU:0'): 
    history = model.fit(X, y, epochs=3, batch_size=8, validation_split=0.2, verbose=1)

Num GPUs Available:  1
GPU memory growth enabled
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled")
    except RuntimeError as e:
        print(e)

texts = df['text'].tolist()
sentiments = df['label'].tolist()

tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
vocab_size = len(tokenizer.word_index) + 1

max_length = 100
X = pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=max_length)

sentiment_map = {'Positive': 0, 'Notr': 1, 'Negative': 2}
y = [sentiment_map[sentiment] for sentiment in sentiments]
y = to_categorical(y)

X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(y, dtype=tf.float32)

with tf.device('/GPU:0'): 
    inputs = Input(shape=(max_length,))
    x = Embedding(vocab_size, 128)(inputs)
    x = LSTM(64, return_sequences=True)(x)
    x = GlobalMaxPooling1D()(x)
    x = Dense(64, activation='relu', kernel_regularizer=L2(0.01))(x)
    x = Dropout(0.6)(x)
    outputs = Dense(3, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)


initial_learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=initial_learning_rate)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-5, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

with tf.device('/GPU:0'):
    history = model.fit(
        X, y,
        epochs=10,
        batch_size=8,
        validation_split=0.2,
        verbose=1,
        callbacks=[lr_scheduler, early_stopping]
    )

Num GPUs Available:  1
GPU memory growth enabled
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled")
    except RuntimeError as e:
        print(e)

texts = df['text'].tolist()
sentiments = df['label'].tolist()

tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
vocab_size = len(tokenizer.word_index) + 1

max_length = 100 
X = pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=max_length)

sentiment_map = {'Positive': 0, 'Notr': 1, 'Negative': 2}
y = [sentiment_map[sentiment] for sentiment in sentiments]
y = to_categorical(y)

X = tf.convert_to_tensor(X, dtype=tf.float32)
y = tf.convert_to_tensor(y, dtype=tf.float32)

with tf.device('/GPU:0'):
    inputs = Input(shape=(max_length,))
    x = Embedding(vocab_size, 128)(inputs)
    x = GlobalAveragePooling1D()(x)
    x = Dense(32, activation='relu', kernel_regularizer=L2(0.01))(x)
    x = Dropout(0.5)(x)
    outputs = Dense(3, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

initial_learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=initial_learning_rate)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-5, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

with tf.device('/GPU:0'):
    history = model.fit(
        X, y,
        epochs=10,
        batch_size=8,
        validation_split=0.2,
        verbose=1,
        callbacks=[lr_scheduler, early_stopping]
    )

Num GPUs Available:  1
GPU memory growth enabled
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/10
Epoch 7: early stopping


In [None]:
text = "Turkcell normal"
predict_sentiment(text)



'nötr'

In [None]:
model.save('sentimentmodel')

In [None]:
import pickle

with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)