#### Import Dataset

In [6]:
import pandas as pd
data = pd.read_csv("Data\data.csv",sep=",",encoding="utf-8")
print(data.head())

                                           sentence  label
0                                   Cinema 4D kapat      3
1                       PowerPoint  uygulamasını aç      3
2  Şebnem Ferah Eşkıya Dünyaya Hükümdar Olmaz oynat      0
3                           Ebru Gündeş Üç Kalp çal      0
4                          Kahramanmaraş hava nasıl      1


#### Clean Dataset

In [7]:
import nltk
from nltk.corpus import stopwords 

nltk.download('stopwords')
stop_word_list = stopwords.words('turkish')

import re

def preprocess_text(sen):
    #sayıları silme
    sentence = re.sub('[\d\s]',' ',str(sen))
    #noktalama işaretlerini silme
    sentence = re.sub('[^\w\s]',' ',str(sentence))
    #birden çok boşluk silme
    sentence = re.sub(r'\s+',' ',sentence)
    #tek karakterleri silme
    sentence = re.sub(r"\b[\w\w]\b",' ',str(sentence))

    #engellenecek kelimeleri silme
    WPT = nltk.WordPunctTokenizer()
    tokens = WPT.tokenize(sentence)
    filtered_tokens = [token for token in tokens if token not in stop_word_list]
    single_doc = ' '.join(filtered_tokens)

    #hüçük harf dönüştürme
    return single_doc.lower()

x = data['sentence']
y = data['label']

x = x.apply(preprocess_text)

print(x.head())
print(y.head())

0                                        cinema kapat
1                          powerpoint uygulamasını aç
2    şebnem ferah eşkıya dünyaya hükümdar olmaz oynat
3                             ebru gündeş üç kalp çal
4                                  kahramanmaraş hava
Name: sentence, dtype: object
0    3
1    3
2    0
3    0
4    1
Name: label, dtype: int64


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\akinb\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


#### Kelimelerin Vektörlere Dönüştürülmesi(TF/IDF)

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

vect = TfidfVectorizer(analyzer='word', lowercase=False)
vect.fit(x)
joblib.dump(vect,"Models\svmvectorizer.pkl")
sent_vector = vect.transform(x)


# Veri Setinin Test ve Train olarak ayrılması

In [9]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(sent_vector,y,test_size=0.2,random_state=0)
print(x_train)
print(y_train)

  (0, 530)	0.4715683662502314
  (0, 474)	0.4295938445123894
  (0, 230)	0.7701119429726776
  (1, 542)	0.4162220011764153
  (1, 425)	0.3012769712441693
  (1, 413)	0.7570571435015578
  (1, 302)	0.2690330677239423
  (1, 185)	0.3007941526674786
  (2, 651)	0.4089952460071913
  (2, 535)	0.6092838017010285
  (2, 32)	0.6793350702917222
  (3, 542)	0.3605684814537448
  (3, 532)	0.41601339635972256
  (3, 425)	0.2609928829121422
  (3, 302)	0.2330603486021609
  (3, 290)	0.41601339635972256
  (3, 273)	0.5775140979554452
  (3, 185)	0.26057462255943864
  (4, 511)	0.3877063603244278
  (4, 455)	0.2146755538652099
  (4, 452)	0.3877063603244278
  (4, 445)	0.4792241314797847
  (4, 333)	0.4404207380625115
  (4, 148)	0.4792241314797847
  (5, 596)	0.6004636592286434
  :	:
  (2084, 561)	0.648891541589256
  (2084, 536)	0.648891541589256
  (2084, 530)	0.3973405774745865
  (2085, 332)	0.5113058235424321
  (2085, 131)	0.8593988333780744
  (2086, 542)	0.4321593801961708
  (2086, 521)	0.7347243647124332
  (2086, 425)

#### Geleneksel Makine Öğrenmesi Yöntemi : SVM

In [10]:
from sklearn.svm import SVC

svc = SVC(C=0.5,kernel='linear')

svc.fit(x_train,y_train)
joblib.dump(svc, "Models\svcmodel.pkl")

['Models\\svcmodel.pkl']

In [11]:
resultsvm = svc.predict(x_test)

In [12]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

clf = LogisticRegression(solver="liblinear").fit(x_train, y_train)
aucsvm = roc_auc_score(y_test, clf.predict_proba(x_test), multi_class='ovr')
print(aucsvm)
accsvm = accuracy_score(y_test,resultsvm)
print(accsvm)

precision_recall_fscore_support(y_test, resultsvm, average='macro')

1.0
1.0


(1.0, 1.0, 1.0, None)

# Değerlendirme Metrikleri : Accuracy, F-Measure, Precision, Recall, Sensitivity, AUC, Mattheww, Correlation Coefficent¶

#Deep Learning Ezberlemediğinin Grafiği

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historymlp.history['loss'],color='b',label='Training loss')
ax[0].plot(historymlp.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historymlp.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historymlp.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historyrnn.history['loss'],color='b',label='Training loss')
ax[0].plot(historyrnn.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historyrnn.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historyrnn.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historylstm.history['loss'],color='b',label='Training loss')
ax[0].plot(historylstm.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historylstm.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historylstm.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)