#### Import Dataset

In [6]:
import pandas as pd
data = pd.read_csv("Data\data.csv",sep=",",encoding="utf-8")
print(data.head())

                                           sentence  label
0                                   Cinema 4D kapat      3
1                       PowerPoint  uygulamasını aç      3
2  Şebnem Ferah Eşkıya Dünyaya Hükümdar Olmaz oynat      0
3                           Ebru Gündeş Üç Kalp çal      0
4                          Kahramanmaraş hava nasıl      1


#### Clean Dataset

In [7]:
import nltk
from nltk.corpus import stopwords 

nltk.download('stopwords')
stop_word_list = stopwords.words('turkish')

import re

def preprocess_text(sen):
    #sayıları silme
    sentence = re.sub('[\d\s]',' ',str(sen))
    #noktalama işaretlerini silme
    sentence = re.sub('[^\w\s]',' ',str(sentence))
    #birden çok boşluk silme
    sentence = re.sub(r'\s+',' ',sentence)
    #tek karakterleri silme
    sentence = re.sub(r"\b[\w\w]\b",' ',str(sentence))

    #engellenecek kelimeleri silme
    WPT = nltk.WordPunctTokenizer()
    tokens = WPT.tokenize(sentence)
    filtered_tokens = [token for token in tokens if token not in stop_word_list]
    single_doc = ' '.join(filtered_tokens)

    #hüçük harf dönüştürme
    return single_doc.lower()

x = data['sentence']
y = data['label']

x = x.apply(preprocess_text)

print(x.head())
print(y.head())

0                                        cinema kapat
1                          powerpoint uygulamasını aç
2    şebnem ferah eşkıya dünyaya hükümdar olmaz oynat
3                             ebru gündeş üç kalp çal
4                                  kahramanmaraş hava
Name: sentence, dtype: object
0    3
1    3
2    0
3    0
4    1
Name: label, dtype: int64


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\akinb\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


#### Kelimelerin Vektörlere Dönüştürülmesi(TF/IDF)

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

vect = TfidfVectorizer(analyzer='word', lowercase=False)
vect.fit(x)
sent_vector = vect.transform(x)


In [9]:
import joblib
joblib.dump(vect,"svmvectorizer.pkl")

['svmvectorizer.pkl']

# Veri Setinin Test ve Train olarak ayrılması

In [10]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(sent_vector,y,test_size=0.2,random_state=0)
print(x_train)
print(y_train)

  (0, 530)	0.4715683662502314
  (0, 474)	0.4295938445123894
  (0, 230)	0.7701119429726776
  (1, 542)	0.4162220011764153
  (1, 425)	0.3012769712441693
  (1, 413)	0.7570571435015578
  (1, 302)	0.2690330677239423
  (1, 185)	0.3007941526674786
  (2, 651)	0.4089952460071913
  (2, 535)	0.6092838017010285
  (2, 32)	0.6793350702917222
  (3, 542)	0.3605684814537448
  (3, 532)	0.41601339635972256
  (3, 425)	0.2609928829121422
  (3, 302)	0.2330603486021609
  (3, 290)	0.41601339635972256
  (3, 273)	0.5775140979554452
  (3, 185)	0.26057462255943864
  (4, 511)	0.3877063603244278
  (4, 455)	0.2146755538652099
  (4, 452)	0.3877063603244278
  (4, 445)	0.4792241314797847
  (4, 333)	0.4404207380625115
  (4, 148)	0.4792241314797847
  (5, 596)	0.6004636592286434
  :	:
  (2084, 561)	0.648891541589256
  (2084, 536)	0.648891541589256
  (2084, 530)	0.3973405774745865
  (2085, 332)	0.5113058235424321
  (2085, 131)	0.8593988333780744
  (2086, 542)	0.4321593801961708
  (2086, 521)	0.7347243647124332
  (2086, 425)

#### Geleneksel Makine Öğrenmesi Yöntemi : SVM

In [11]:
from sklearn.svm import SVC
import joblib

# Başarı oranının değiştiği gözlemlenecektir. ( ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’ )
svc = SVC(C=0.5,kernel='linear')

svc.fit(x_train,y_train)
joblib.dump(svc, "svcmodel.pkl")

['svcmodel.pkl']

In [12]:
print(x_test)

  (0, 592)	0.4297793748965629
  (0, 455)	0.21285011017146235
  (0, 411)	0.3781710839889996
  (0, 124)	0.3844095893705985
  (0, 103)	0.3313303429736972
  (0, 48)	0.4297793748965629
  (0, 12)	0.4297793748965629
  (1, 584)	0.444446497274127
  (1, 433)	0.8151991384913686
  (1, 64)	0.37137269105803916
  (2, 342)	0.846723370378579
  (2, 302)	0.35468497432363516
  (2, 185)	0.3965578180338674
  (3, 455)	0.23845865449788142
  (3, 213)	0.48921330221554465
  (3, 98)	0.48921330221554465
  (3, 49)	0.47449816111461024
  (3, 21)	0.48921330221554465
  (4, 677)	0.3752604634518901
  (4, 516)	0.5409447492968189
  (4, 301)	0.5409447492968189
  (4, 64)	0.22612032139606097
  (4, 50)	0.4720240896797729
  (5, 566)	0.9382418273891142
  (5, 302)	0.3459801632139271
  :	:
  (518, 84)	0.29570190571586663
  (518, 47)	0.4237729142732062
  (518, 36)	0.40234947332533577
  (519, 651)	0.37857170348519037
  (519, 600)	0.6057849536203026
  (519, 584)	0.35033219271830657
  (519, 414)	0.6057849536203026
  (520, 332)	0.51130

In [13]:
resultsvm = svc.predict(x_test)
print(resultsvm)

[0 3 1 0 0 1 1 3 0 1 3 3 0 2 3 0 0 0 3 1 1 0 1 0 0 1 2 0 2 3 0 3 2 1 0 3 0
 0 3 0 3 0 0 0 0 3 3 2 0 1 3 2 3 3 3 0 3 1 2 3 2 1 1 1 3 0 0 3 0 3 1 0 0 3
 3 0 3 0 0 1 0 3 0 0 3 1 0 3 0 3 1 0 1 0 3 0 1 3 3 0 0 1 1 1 1 0 1 3 3 1 0
 1 0 3 1 2 1 1 3 0 3 1 2 3 1 0 3 1 0 1 3 3 2 0 0 2 0 3 2 0 1 1 3 3 3 3 3 0
 0 3 3 0 2 0 0 3 1 0 1 0 3 0 1 2 0 3 1 0 3 1 3 0 3 3 2 0 2 0 3 3 1 0 1 1 0
 1 0 3 0 1 2 1 3 3 3 3 3 0 3 1 2 3 3 1 0 0 0 0 0 1 1 0 0 3 0 2 1 2 3 3 2 0
 2 0 1 0 3 2 3 0 1 3 3 3 0 1 1 2 0 2 3 0 3 3 2 3 2 0 1 0 2 0 0 0 3 1 0 2 0
 3 1 0 3 1 0 2 2 3 3 0 3 2 1 3 2 0 3 0 2 1 2 2 1 2 0 0 3 0 0 2 3 0 0 1 1 0
 2 3 0 0 1 3 1 0 2 0 2 1 1 0 0 3 3 1 2 3 1 3 0 0 3 0 3 2 1 2 0 0 0 0 3 0 3
 0 2 2 3 2 0 0 1 2 3 0 3 2 3 2 0 1 0 1 0 2 0 3 0 2 1 2 0 3 1 3 0 3 2 1 1 0
 0 1 2 3 0 0 0 1 2 2 2 0 3 1 2 3 1 3 1 1 3 3 3 2 3 0 3 2 1 3 1 2 2 2 2 1 1
 3 1 2 3 3 3 3 0 1 3 2 3 3 2 0 0 3 0 1 0 3 0 0 0 1 2 0 0 1 1 2 0 3 0 0 3 2
 0 2 1 2 2 3 0 1 2 1 1 0 1 3 1 3 0 1 0 3 2 0 0 2 3 3 3 1 0 0 1 3 0 0 0 3 3
 2 0 0 0 2 1 0 2 1 1 3 0 

In [14]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

clf = LogisticRegression(solver="liblinear").fit(x_train, y_train)
aucsvm = roc_auc_score(y_test, clf.predict_proba(x_test), multi_class='ovr')
print(aucsvm)
accsvm = accuracy_score(y_test,resultsvm)
print(accsvm)

precision_recall_fscore_support(y_test, resultsvm, average='macro')

1.0
1.0


(1.0, 1.0, 1.0, None)

# Yapay Sinir Ağları : MLP

In [None]:
from keras.models import Sequential
from keras.layers import Dense

modelmlp = Sequential()

modelmlp.add(Dense(600,input_dim = 40065, activation = 'relu'))

modelmlp.add(Dense(600,input_dim = 40065, activation = 'relu'))

modelmlp.add(Dense(600,input_dim = 40065, activation = 'relu'))

modelmlp.add(Dense(600,input_dim = 40065, activation = 'tanh'))

modelmlp.add(Dense(5,input_dim = 40065, activation = 'sigmoid'))

In [None]:
modelmlp.compile(loss='sparse_categorical_crossentropy',optimizer = 'adam',metrics =['accuracy'])
modelmlp.summary()

In [None]:
historymlp = modelmlp.fit(x_train, y_train, epochs=10, batch_size=32, verbose=2,validation_data=(x_test,y_test))

In [None]:
#model test
scoremlptest = modelmlp.evaluate(x_test,y_test)

print("test Loss:",scoremlptest[0])
print("test Accuracy:",scoremlptest[1])


In [None]:
from sklearn.metrics import confusion_matrix
#confusion_matrix(y_test, resultmlp)
print(resultmlp)

#Derin Öğrenme Yöntemleri : RNN, LSTM


In [20]:
#Kelimeleri sayıya dönüştürme 
from keras.preprocessing.text import  Tokenizer
from keras.preprocessing.sequence import  pad_sequences

token = Tokenizer()
token.fit_on_texts(x)
xdl = token.texts_to_sequences(x)
xdl = pad_sequences(xdl)
print(xdl)

[[  0   0   0 ...   0 147  12]
 [  0   0   0 ... 215   8   5]
 [  0   0   0 ... 371 230   6]
 ...
 [  0   0   0 ... 576  30   6]
 [  0   0   0 ...   4  16  33]
 [  0   0   0 ...   0 129   5]]


In [21]:
#Verileri ölçeklendirme standartlaştırma ve normalleştirme
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
xdl=scaler.fit_transform(xdl)
print(xdl)

[[-0.02710634 -0.04167051 -0.09143805 ... -0.83658457  0.1064601
  -0.38938777]
 [-0.02710634 -0.04167051 -0.09143805 ...  0.33250456 -0.68332427
  -0.44107311]
 [-0.02710634 -0.04167051 -0.09143805 ...  1.18077388  0.57805796
  -0.43368949]
 ...
 [-0.02710634 -0.04167051 -0.09143805 ...  2.29548677 -0.55832242
  -0.43368949]
 [-0.02710634 -0.04167051 -0.09143805 ... -0.81483407 -0.63786905
  -0.23433175]
 [-0.02710634 -0.04167051 -0.09143805 ... -0.83658457  0.00418586
  -0.44107311]]


In [22]:
#One Hot Encoding 0,1,2 Olan labellerımızı 1,0,0(Notr)/0,1,0(olumlu)/0,0,1(olumsuz) şekline getiriyoruz
from keras.utils import  to_categorical

ydl=to_categorical(y)
print(ydl)

[[0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 ...
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [23]:
xdl_train,xdl_test,ydl_train,ydl_test = train_test_split(xdl,ydl,test_size=0.2,random_state=0)

In [24]:
print(xdl_train.shape)
print(ydl_train.shape)

(2090, 12)
(2090, 4)


RNN

In [25]:
#RNN Model17000
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Dropout

modelrnn=Sequential()

modelrnn.add(Embedding(480,256))

modelrnn.add(SimpleRNN(256,activation='tanh',return_sequences=True,))

modelrnn.add(SimpleRNN(256,activation='tanh'))

modelrnn.add(Dense(5,activation='softmax'))

In [26]:
#Model derlemesi

modelrnn.compile(loss='categorical_crossentropy',optimizer ='adam',metrics=['accuracy'])
modelrnn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 256)         122880    
                                                                 
 simple_rnn (SimpleRNN)      (None, None, 256)         131328    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 256)               131328    
                                                                 
 dense_1 (Dense)             (None, 5)                 1285      
                                                                 
Total params: 386821 (1.48 MB)
Trainable params: 386821 (1.48 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [27]:

#model eğitimi
historyrnn = modelrnn.fit(xdl_train, ydl_train, epochs=10, batch_size=32, verbose=2,validation_data=(xdl_test,ydl_test))

Epoch 1/10


ValueError: in user code:

    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1151, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1209, in compute_loss
        return self.compiled_loss(
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\compile_utils.py", line 277, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\losses.py", line 143, in __call__
        losses = call_fn(y_true, y_pred)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\losses.py", line 270, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\losses.py", line 2221, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend.py", line 5573, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 4) and (None, 5) are incompatible


In [None]:
#model test
scorernntest = modelrnn.evaluate(xdl_test,ydl_test)

print("Test Loss:",scorernntest[0])
print("Test Accuracy:",scorernntest[1])

In [None]:
#from sklearn.metrics import accuracy_score
#from sklearn.metrics import precision_recall_fscore_support
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import roc_auc_score

#resultrnn = modelrnn.predict(xdl_test)
#clf = LogisticRegression(solver="liblinear").fit(xdl_train, ydl_train)
#aucrnn = roc_auc_score(ydl_test, clf.predict_proba(xdl_test), multi_class='ovr')
#print(aucrnn)

#precision_recall_fscore_support(ydl_test, resultrnn, average='macro')

LSTM


In [None]:
#LSTM 
from keras.layers import LSTM

modellstm=Sequential()

modellstm.add(Embedding(500,256))

modellstm.add(LSTM(256,activation='tanh',return_sequences=True))

modellstm.add(LSTM(256,activation='tanh'))

modellstm.add(Dense(3,activation='softmax'))

In [None]:
#Model derlemesi
modellstm.compile(loss='categorical_crossentropy',optimizer ='adam',metrics=['accuracy'])
modellstm.summary()

In [None]:
#model eğitimi
historylstm = modellstm.fit(xdl_train,ydl_train,epochs=10, batch_size=32,verbose=1,validation_data=(xdl_test,ydl_test))

In [None]:
#model test

scorelstmtest = modellstm.evaluate(xdl_test,ydl_test)

print("Test Loss:",scorelstmtest[0])
print("Test Accuracy:",scorelstmtest[1])

In [None]:
#from sklearn.metrics import accuracy_score
#from sklearn.metrics import precision_recall_fscore_support
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import roc_auc_score

#resultrnn = model.predict(x_test)
#clf = LogisticRegression(solver="liblinear").fit(xdl_train, ydl_train)
#aucrnn = roc_auc_score(ydl_test, clf.predict_proba(xdl_test), multi_class='ovr')
#print(aucrnn)

#precision_recall_fscore_support(ydl_test, resultrnn, average='macro')

# Kelime Yerleştirme Yöntemleri : Word2Vec, Glove

Word2Vec

In [28]:
import numpy as np 
from gensim.models.word2vec import Word2Vec 
from keras.layers import  Dense,Dropout,Conv1D,MaxPool1D,GlobalMaxPool1D,Activation,LSTM
from keras.models import Sequential
from gensim.parsing.preprocessing import remove_stopwords
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt 
import seaborn as sns 

words = []
for i in x:
    words.append(i.split())

print(words[0])

word2vec_model = Word2Vec(words, window = 3, min_count=1, workers=16)
print(word2vec_model)

modelw2v = Sequential()

modelw2v.add(LSTM(units=150))
modelw2v.add(Dense(3,activation='sigmoid'))

['cinema', 'kapat']
Word2Vec<vocab=691, vector_size=100, alpha=0.025>


In [29]:
modelw2v.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
modelw2v.summary()

ValueError: This model has not yet been built. Build the model first by calling `build()` or by calling the model on a batch of data.

In [30]:
historyw2v = modelw2v.fit(xdl_train,ydl_train,batch_size=16,epochs=5,validation_data=(xdl_test,ydl_test))

Epoch 1/5


ValueError: in user code:

    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\akinb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_3' (type Sequential).
    
    Input 0 of layer "lstm_1" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 12)
    
    Call arguments received by layer 'sequential_3' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 12), dtype=float32)
      • training=True
      • mask=None


In [None]:
scorew2v = modelw2v.evaluate(xdl_test,ydl_test,verbose=0)

print('Test Score : ',scorew2v[0])
print('Test accuracy : ',scorew2v[1])

In [None]:
#from sklearn.metrics import accuracy_score
#from sklearn.metrics import precision_recall_fscore_support
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import roc_auc_score

#resultw2v= modelw2v.predict(xdl_test)
#clf = LogisticRegression(solver="liblinear").fit(xdl_train, ydl_train)
#aucw2v = roc_auc_score(ydl_test, clf.predict_proba(xdl_test), multi_class='ovr')
#print(aucw2v)

#precision_recall_fscore_support(ydl_test, resultw2v, average='macro')

# Değerlendirme Metrikleri : Accuracy, F-Measure, Precision, Recall, Sensitivity, AUC, Mattheww, Correlation Coefficent¶

#Deep Learning Ezberlemediğinin Grafiği

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historymlp.history['loss'],color='b',label='Training loss')
ax[0].plot(historymlp.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historymlp.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historymlp.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historyrnn.history['loss'],color='b',label='Training loss')
ax[0].plot(historyrnn.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historyrnn.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historyrnn.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historylstm.history['loss'],color='b',label='Training loss')
ax[0].plot(historylstm.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historylstm.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historylstm.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)