#### Import Dataset

In [None]:
import pandas as pd
data = pd.read_csv("Data\data.csv",sep=";")
print(data.head())

#### Clean Dataset

In [None]:
import nltk
from nltk.corpus import stopwords 

nltk.download('stopwords')
stop_word_list = stopwords.words('turkish')

import re

def preprocess_text(sen):
    #sayıları silme
    sentence = re.sub('[\d\s]',' ',str(sen))
    #noktalama işaretlerini silme
    sentence = re.sub('[^\w\s]',' ',str(sentence))
    #birden çok boşluk silme
    sentence = re.sub(r'\s+',' ',sentence)
    #tek karakterleri silme
    sentence = re.sub(r"\b[\w\w]\b",' ',str(sentence))

    #engellenecek kelimeleri silme
    WPT = nltk.WordPunctTokenizer()
    tokens = WPT.tokenize(sentence)
    filtered_tokens = [token for token in tokens if token not in stop_word_list]
    single_doc = ' '.join(filtered_tokens)

    #hüçük harf dönüştürme
    return single_doc.lower()

x = data['Sentence']
y = data['Tag']

x = x.apply(preprocess_text)

print(x.head())
print(y.head())

#### Transform Labels

In [None]:
#Etiket bilgilerinin metin halinden notr,olumlu,olumsuz şeklinden 0,1,2 haline çevrilmesi
from sklearn import preprocessing

y= preprocessing.LabelEncoder().fit_transform(y)
print(y)


#### Kelimelerin Vektörlere Dönüştürülmesi(TF/IDF)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vect = TfidfVectorizer(analyzer='word', lowercase=False)
vect.fit(x)
sent_vector = vect.transform(x)


In [None]:
import joblib
joblib.dump(vect,"svmvectorizer.pkl")

# Veri Setinin Test ve Train olarak ayrılması

In [None]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(sent_vector,y,test_size=0.2,random_state=0)
print(x_train)
print(y_train)

#### Geleneksel Makine Öğrenmesi Yöntemi : SVM

In [None]:
from sklearn.svm import SVC
import joblib

# Başarı oranının değiştiği gözlemlenecektir. ( ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’ )
svc = SVC(C=0.5,kernel='linear')

svc.fit(x_train,y_train)
joblib.dump(svc, "svcmodel.pkl")

In [None]:
print(x_test)

In [None]:
resultsvm = svc.predict(x_test)
print(resultsvm)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

clf = LogisticRegression(solver="liblinear").fit(x_train, y_train)
aucsvm = roc_auc_score(y_test, clf.predict_proba(x_test), multi_class='ovr')
print(aucsvm)
accsvm = accuracy_score(y_test,resultsvm)
print(accsvm)

precision_recall_fscore_support(y_test, resultsvm, average='macro')

# Yapay Sinir Ağları : MLP

In [None]:
from keras.models import Sequential
from keras.layers import Dense

modelmlp = Sequential()

modelmlp.add(Dense(600,input_dim = 40065, activation = 'relu'))

modelmlp.add(Dense(600,input_dim = 40065, activation = 'relu'))

modelmlp.add(Dense(600,input_dim = 40065, activation = 'relu'))

modelmlp.add(Dense(600,input_dim = 40065, activation = 'tanh'))

modelmlp.add(Dense(5,input_dim = 40065, activation = 'sigmoid'))

In [None]:
modelmlp.compile(loss='sparse_categorical_crossentropy',optimizer = 'adam',metrics =['accuracy'])
modelmlp.summary()

In [None]:
historymlp = modelmlp.fit(x_train, y_train, epochs=10, batch_size=32, verbose=2,validation_data=(x_test,y_test))

In [None]:
#model test
scoremlptest = modelmlp.evaluate(x_test,y_test)

print("test Loss:",scoremlptest[0])
print("test Accuracy:",scoremlptest[1])


In [None]:
from sklearn.metrics import confusion_matrix
#confusion_matrix(y_test, resultmlp)
print(resultmlp)

#Derin Öğrenme Yöntemleri : RNN, LSTM


In [None]:
#Kelimeleri sayıya dönüştürme 
from keras.preprocessing.text import  Tokenizer
from keras.preprocessing.sequence import  pad_sequences

token = Tokenizer()
token.fit_on_texts(x)
xdl = token.texts_to_sequences(x)
xdl = pad_sequences(xdl)
print(xdl)

In [None]:
#Verileri ölçeklendirme standartlaştırma ve normalleştirme
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
xdl=scaler.fit_transform(xdl)
print(xdl)

In [None]:
#One Hot Encoding 0,1,2 Olan labellerımızı 1,0,0(Notr)/0,1,0(olumlu)/0,0,1(olumsuz) şekline getiriyoruz
from keras.utils import  to_categorical

ydl=to_categorical(y)
print(ydl)

In [None]:
xdl_train,xdl_test,ydl_train,ydl_test = train_test_split(xdl,ydl,test_size=0.2,random_state=0)

In [None]:
print(xdl_train.shape)
print(ydl_train.shape)

RNN

In [None]:
#RNN Model17000
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Dropout

modelrnn=Sequential()

modelrnn.add(Embedding(480,256))

modelrnn.add(SimpleRNN(256,activation='tanh',return_sequences=True,))

modelrnn.add(SimpleRNN(256,activation='tanh'))

modelrnn.add(Dense(5,activation='softmax'))

In [None]:
#Model derlemesi

modelrnn.compile(loss='categorical_crossentropy',optimizer ='adam',metrics=['accuracy'])
modelrnn.summary()

In [None]:

#model eğitimi
historyrnn = modelrnn.fit(xdl_train, ydl_train, epochs=10, batch_size=32, verbose=2,validation_data=(xdl_test,ydl_test))

In [None]:
#model test
scorernntest = modelrnn.evaluate(xdl_test,ydl_test)

print("Test Loss:",scorernntest[0])
print("Test Accuracy:",scorernntest[1])

In [None]:
#from sklearn.metrics import accuracy_score
#from sklearn.metrics import precision_recall_fscore_support
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import roc_auc_score

#resultrnn = modelrnn.predict(xdl_test)
#clf = LogisticRegression(solver="liblinear").fit(xdl_train, ydl_train)
#aucrnn = roc_auc_score(ydl_test, clf.predict_proba(xdl_test), multi_class='ovr')
#print(aucrnn)

#precision_recall_fscore_support(ydl_test, resultrnn, average='macro')

LSTM


In [None]:
#LSTM 
from keras.layers import LSTM

modellstm=Sequential()

modellstm.add(Embedding(500,256))

modellstm.add(LSTM(256,activation='tanh',return_sequences=True))

modellstm.add(LSTM(256,activation='tanh'))

modellstm.add(Dense(3,activation='softmax'))

In [None]:
#Model derlemesi
modellstm.compile(loss='categorical_crossentropy',optimizer ='adam',metrics=['accuracy'])
modellstm.summary()

In [None]:
#model eğitimi
historylstm = modellstm.fit(xdl_train,ydl_train,epochs=10, batch_size=32,verbose=1,validation_data=(xdl_test,ydl_test))

In [None]:
#model test

scorelstmtest = modellstm.evaluate(xdl_test,ydl_test)

print("Test Loss:",scorelstmtest[0])
print("Test Accuracy:",scorelstmtest[1])

In [None]:
#from sklearn.metrics import accuracy_score
#from sklearn.metrics import precision_recall_fscore_support
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import roc_auc_score

#resultrnn = model.predict(x_test)
#clf = LogisticRegression(solver="liblinear").fit(xdl_train, ydl_train)
#aucrnn = roc_auc_score(ydl_test, clf.predict_proba(xdl_test), multi_class='ovr')
#print(aucrnn)

#precision_recall_fscore_support(ydl_test, resultrnn, average='macro')

# Kelime Yerleştirme Yöntemleri : Word2Vec, Glove

Word2Vec

In [None]:
import numpy as np 
from gensim.models.word2vec import Word2Vec 
from keras.layers import  Dense,Dropout,Conv1D,MaxPool1D,GlobalMaxPool1D,Activation,LSTM
from keras.models import Sequential
from gensim.parsing.preprocessing import remove_stopwords
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix 
import matplotlib.pyplot as plt 
import seaborn as sns 

words = []
for i in x:
    words.append(i.split())

print(words[0])

word2vec_model = Word2Vec(words,size = 150 , window = 3, min_count=1, workers=16)
print(word2vec_model)

modelw2v = Sequential()

modelw2v.add(word2vec_model.wv.get_keras_embedding(True))
modelw2v.add(LSTM(units=150))
modelw2v.add(Dense(3,activation='sigmoid'))

In [None]:
modelw2v.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
modelw2v.summary()

In [None]:
historyw2v = modelw2v.fit(xdl_train,ydl_train,batch_size=16,epochs=5,validation_data=(xdl_test,ydl_test))

In [None]:
scorew2v = modelw2v.evaluate(xdl_test,ydl_test,verbose=0)

print('Test Score : ',scorew2v[0])
print('Test accuracy : ',scorew2v[1])

In [None]:
#from sklearn.metrics import accuracy_score
#from sklearn.metrics import precision_recall_fscore_support
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import roc_auc_score

#resultw2v= modelw2v.predict(xdl_test)
#clf = LogisticRegression(solver="liblinear").fit(xdl_train, ydl_train)
#aucw2v = roc_auc_score(ydl_test, clf.predict_proba(xdl_test), multi_class='ovr')
#print(aucw2v)

#precision_recall_fscore_support(ydl_test, resultw2v, average='macro')

# Değerlendirme Metrikleri : Accuracy, F-Measure, Precision, Recall, Sensitivity, AUC, Mattheww, Correlation Coefficent¶

#Deep Learning Ezberlemediğinin Grafiği

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historymlp.history['loss'],color='b',label='Training loss')
ax[0].plot(historymlp.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historymlp.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historymlp.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historyrnn.history['loss'],color='b',label='Training loss')
ax[0].plot(historyrnn.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historyrnn.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historyrnn.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)

In [None]:
import matplotlib.pyplot as plt
fig,ax =plt.subplots(2,1)
ax[0].plot(historylstm.history['loss'],color='b',label='Training loss')
ax[0].plot(historylstm.history['val_loss'],color='r',label='Validation loss',axes=ax[0])
legend=ax[0].legend(loc='best',shadow=True)

ax[1].plot(historylstm.history['accuracy'],color='b',label='Training accuracy')
ax[1].plot(historylstm.history['val_accuracy'],color='r',label='Validation accuracy')
legend=ax[1].legend(loc='best',shadow=True)