In [1]:
# First of all we have an excel file "bank_data.xlsx" , which contains statements of users in the first column.
# The second column consists of the sentences after the statements being stemmed.
# The third column holds the corresponding operation names

In [2]:
# PART 1 : Extracting the data from "bank_data.xlsx"
# First we need to download the OpenPyXL Module lib

In [3]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import openpyxl as xl
from snowballstemmer import TurkishStemmer
import random
import matplotlib.pyplot as plt
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC

In [4]:
# To open the workbook  
# workbook object is created 
wb_obj = xl.load_workbook("bank_data.xlsx")

# Get workbook active sheet object 
# from the active attribute 
sheet_obj = wb_obj.active 

# Cell objects also have a row, column,  
# and coordinate attributes that provide 
# location information for the cell. 

# Note: The first row or  
# column integer is 1, not 0.

# Cell object is created by using  
# sheet object's cell() method. 
cell_obj = sheet_obj.cell(row = 1, column = 1) 

# Print value of cell object  
# using the value attribute 
print(cell_obj.value, type(cell_obj.value))

raw text <class 'str'>


In [5]:
tot_row = sheet_obj.max_row #the total number of rows, including the first line
tot_col = sheet_obj.max_column #the total number of col

print(tot_row)

4757


In [6]:
all_words = []
documents = []


for i in range(2, tot_row): 
    cell_obj_statement = sheet_obj.cell(row = i, column = 2) 
    cell_obj_intent = sheet_obj.cell(row = i, column = 3)
    if type(cell_obj_statement.value) == str:
        documents.append((cell_obj_statement.value.lower(), cell_obj_intent.value))
        words = word_tokenize(cell_obj_statement.value)
        for w in words:
            all_words.append(w.lower())
            
# option_pool = []
# for i in range (0,len(documents)-1):
#     option_pool.append(documents[:][i][1])

# option_set = set(option_pool)    

# option_dict = {}

# num_opt = 0
# for opt in option_set:
#     option_dict[opt] = num_opt
#     num_opt = num_opt + 1 

In [7]:
all_words = nltk.FreqDist(all_words)
print(len(list(all_words.keys())))

946


In [8]:
word_features = list(all_words.keys())[:200]

def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)
    
    return features

featuresets = [(find_features(rev),category) for (rev,category) in documents]
random.shuffle(featuresets)

In [9]:
print(len(featuresets))

4755


In [10]:
random.shuffle(featuresets)

training_set = featuresets[:4000]
testing_set = featuresets[4000:]

In [11]:
classifier = nltk.NaiveBayesClassifier.train(training_set)
print("Original Naive Bayes Algo accuracy", (nltk.classify.accuracy(classifier, testing_set))*100)
classifier.show_most_informative_features(15)

BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(training_set)
print("BernoulliNB_classifier accuracy", (nltk.classify.accuracy(BernoulliNB_classifier, testing_set))*100)

SVC_classifier = SklearnClassifier(SVC())
SVC_classifier.train(training_set)
print("SVC_classifier accuracy: ", (nltk.classify.accuracy(SVC_classifier, testing_set))*100)

LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(training_set)
print("LinearSVC_classifier accuracy: ", (nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100)

Original Naive Bayes Algo accuracy 59.735099337748345
Most Informative Features
                aktarmak = True           lower_ : irr    =    411.4 : 1.0
                kampanya = True           movie_ : irr    =    301.4 : 1.0
                     tür = True           view_a : irr    =    251.6 : 1.0
                   neden = True             name : irr    =    214.1 : 1.0
                     gün = True           weeken : irr    =    209.0 : 1.0
                   hangi = True             team : irr    =    181.5 : 1.0
                     atm = True           atm_ca : irr    =    169.9 : 1.0
                    vade = True           accoun : irr    =    158.9 : 1.0
                 avantaj = True           accoun : irr    =    137.9 : 1.0
                bulunmak = True           money_ : irr    =    132.4 : 1.0
                  bakiye = True           missin : irr    =    125.2 : 1.0
                      tl = True           accoun : irr    =    122.7 : 1.0
                   z

In [12]:
from nltk.classify import ClassifierI
from statistics import mode

In [13]:
class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers  = classifiers
        
    def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
    
    def confidence(self,features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf

In [14]:
voted_classifier = VoteClassifier(LinearSVC_classifier)
turkStem=TurkishStemmer()

In [15]:
def sentiment(text):
    e_text = ""
    for w in text.split(" "):
        w = w.lower()
        w = turkStem.stemWord(w) 
        e_text = e_text + " " + w

    feats = find_features(e_text)
    print(e_text)
    
    return voted_classifier.classify(feats), voted_classifier.confidence(feats)

In [16]:
print(sentiment("Kart hesapta para çekiminde kısıtlama var mı"))
print(sentiment("Kart hesaptan otomatik ödeme talimatı oluşturabilir miyim"))
print(sentiment("her şey yolunda gidiyor mu oksi"))
print(sentiment("Kart hesabını tanımlar mısın"))
print(sentiment("su faturamı Kart hesap ile yatırabilir miyim"))
print(sentiment("hesabının herhangi bir geçerlilik süresi var mı"))
print(sentiment("hafta sonu Kart hesaba para yatırabilir miyim"))
print(sentiment("Kart hesaba artı olarak bir hesap açacağım"))

 kart hesap par çekim kısıtla var mı
('expense', 1.0)
 kart hesap otomatik öde talimat oluşturabilir mi
('automatic_payment', 1.0)
 her şey yol gidiyor mu oksi
('whats_up', 1.0)
 kart hesap tanım mı
('expense', 1.0)
 su fatura kart hesap il yatırabilir mi
('payment', 1.0)
 hesap herhangi bir geçerlilik süres var mı
('expense', 1.0)
 haf so kart hesap par yatırabilir mi
('cancel', 1.0)
 kart hesap ar olarak bir hesap açacak
('cancel', 1.0)
