In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## LOAD DATASET

In [None]:
data = pd.read_csv('dataset_sms_spam_v1.csv')
data.head()

Unnamed: 0,teks,label
0,[PROMO] Beli paket Flash mulai 1GB di MY TELKO...,2
1,2.5 GB/30 hari hanya Rp 35 Ribu Spesial buat A...,2
2,"2016-07-08 11:47:11.Plg Yth, sisa kuota Flash ...",2
3,"2016-08-07 11:29:47.Plg Yth, sisa kuota Flash ...",2
4,4.5GB/30 hari hanya Rp 55 Ribu Spesial buat an...,2


## Text Preprocessing

## Case Folding

In [None]:
import re

# membuat fungsi untuk case folding
def casefolding(text):
    text = text.lower() # merubah kalimat menjadi huruf kecil
    text = re.sub(r'https?://\S+|www\.\S+', '', text) # menghapus url dari kalimat
    text = re.sub(r'[-+]?[0-9]+','', text) # menghapus angka dari kalimat
    text = re.sub(r'[^\w\s]','',text) # menghapus karakter tanda baca
    text = text.strip()
    return text


In [None]:
#membandingkan before dan after case folding
raw_sample = data['teks'].iloc[2]
case_folding = casefolding(raw_sample)

print('Raw data\t : ',raw_sample)
print('Case Folding\t :', case_folding)

Raw data	 :  2016-07-08 11:47:11.Plg Yth, sisa kuota Flash Anda 478KB. Download MyTelkomsel apps di http://tsel.me/tsel utk cek kuota&beli paket Flash atau hub *363#
Case Folding	 : plg yth sisa kuota flash anda kb download mytelkomsel apps di  utk cek kuotabeli paket flash atau hub


## Word Normalization

In [None]:
key_norm = pd.read_csv('key_norm.csv')

def text_normalize(text):
  text = ' '.join([key_norm[key_norm['singkat']== word]['hasil'].values[0]
                   if (key_norm['singkat']== word).any()
                   else word for word in text.split()
                   ])

  text = str.lower(text)
  return text

In [None]:
# membandingkan before dan after word normalization

raw_data = data['teks'].iloc[696]
word_normal = text_normalize(case_folding)

print('Raw Data\t :', raw_data)
print('Word Normalize\t :', word_normal)

Raw Data	 : Btw magicomnya yg sedang Gais, gaada yg gede
Word Normalize	 : pelanggan yang terhormat sisa kuota flash anda kb download mytelkomsel apps di untuk cek kuotabeli paket flash atau hubungi


## Filtering (Stopword Removal)

In [None]:
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

stopwords_ind = stopwords.words('indonesian')

In [None]:
len(stopwords_ind)

758

In [None]:
# melihat daftar stopwords dari nltk

stopwords_ind

['ada',
 'adalah',
 'adanya',
 'adapun',
 'agak',
 'agaknya',
 'agar',
 'akan',
 'akankah',
 'akhir',
 'akhiri',
 'akhirnya',
 'aku',
 'akulah',
 'amat',
 'amatlah',
 'anda',
 'andalah',
 'antar',
 'antara',
 'antaranya',
 'apa',
 'apaan',
 'apabila',
 'apakah',
 'apalagi',
 'apatah',
 'artinya',
 'asal',
 'asalkan',
 'atas',
 'atau',
 'ataukah',
 'ataupun',
 'awal',
 'awalnya',
 'bagai',
 'bagaikan',
 'bagaimana',
 'bagaimanakah',
 'bagaimanapun',
 'bagi',
 'bagian',
 'bahkan',
 'bahwa',
 'bahwasanya',
 'baik',
 'bakal',
 'bakalan',
 'balik',
 'banyak',
 'bapak',
 'baru',
 'bawah',
 'beberapa',
 'begini',
 'beginian',
 'beginikah',
 'beginilah',
 'begitu',
 'begitukah',
 'begitulah',
 'begitupun',
 'bekerja',
 'belakang',
 'belakangan',
 'belum',
 'belumlah',
 'benar',
 'benarkah',
 'benarlah',
 'berada',
 'berakhir',
 'berakhirlah',
 'berakhirnya',
 'berapa',
 'berapakah',
 'berapalah',
 'berapapun',
 'berarti',
 'berawal',
 'berbagai',
 'berdatangan',
 'beri',
 'berikan',
 'berikut'

In [None]:
# membuat fungsi stopwords removal

# menambahkan kata ke dalam stopwords
more_stopwords = ['tsel', 'gb', 'rb', 'btw']
stopwords_ind = stopwords_ind + more_stopwords

def remove_stop_word(text):
    clean_words = []
    text = text.split()
    for word in text:
        if word not in stopwords_ind:
            clean_words.append(word)

    return " ".join(clean_words)



In [None]:
raw_sample = data['teks'].iloc[696]
case_folding = casefolding(raw_sample)
stopwords_removal = remove_stop_word(case_folding)

print('Raw Data \t\t :', raw_data)
print('Case Folding \t\t :', case_folding)
print('Stop Removal \t\t', stopwords_removal)

Raw Data 		 : Btw magicomnya yg sedang Gais, gaada yg gede
Case Folding 		 : btw magicomnya yg sedang gais gaada yg gede
Stop Removal 		 magicomnya yg gais gaada yg gede


## Stemming

In [None]:
!pip -q install sastrawi

In [None]:
# merubah kata menjadi kata dasar

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

factory = StemmerFactory()
stemmer = factory.create_stemmer()

# membuat fungsi untung stemming bahasa indonesia
def stemming(text):
  text = stemmer.stem(text)
  return text

In [None]:
raw_sample = data['teks'].iloc[696]
case_folding = casefolding(raw_sample)
stopword_removal = remove_stop_word(case_folding)
text_stemming = stemming(stopword_removal)

print('Raw Data \t\t :', raw_sample)
print('Case Folding \t\t :', stopword_removal)
print('Stopword Removal \t\t :', stopwords_removal)
print('Stemming \t\t :', text_stemming)

Raw Data 		 : Btw magicomnya yg sedang Gais, gaada yg gede
Case Folding 		 : magicomnya yg gais gaada yg gede
Stopword Removal 		 : magicomnya yg gais gaada yg gede
Stemming 		 : magicomnya yg gais gaada yg gede


## Teks Preprocessing Pipeline


In [None]:
# membuat fungsi untuk menggabungkan seluruh langkah teks preprocessing

def text_preprocessing_process(text):
  text = casefolding(text)
  text = text_normalize(text)
  text = remove_stop_word(text)
  text = stemming(text)
  return text


In [None]:
%%time
data['clean_teks']= data['teks'].apply(text_preprocessing_process)

CPU times: user 4min 59s, sys: 695 ms, total: 4min 59s
Wall time: 5min 6s


In [None]:
data

Unnamed: 0,teks,label,clean_teks
0,[PROMO] Beli paket Flash mulai 1GB di MY TELKO...,2,promo beli paket flash my telkomsel app extra ...
1,2.5 GB/30 hari hanya Rp 35 Ribu Spesial buat A...,2,rupiah ribu spesial pilih aktif promo sd novem...
2,"2016-07-08 11:47:11.Plg Yth, sisa kuota Flash ...",2,langgan hormat sisa kuota flash kb download my...
3,"2016-08-07 11:29:47.Plg Yth, sisa kuota Flash ...",2,langgan hormat sisa kuota flash kb download my...
4,4.5GB/30 hari hanya Rp 55 Ribu Spesial buat an...,2,rupiah ribu spesial pilih aktif buru skb
...,...,...,...
1138,"Yooo sama2, oke nanti aku umumin di grup kelas",0,yooo oke umumin grup kelas
1139,😁 sebelumnya ga ad nulis kerudung. Kirain warn...,0,nulis kerudung kirain warna jins
1140,Mba mau kirim 300 ya,0,mbak kirim ya
1141,nama1 beaok bwrangkat pagi...mau cas atay tra...,0,nama beaok bwrangkat pagimau cas atay tranfer


In [None]:
# simpan data yang sudah di preprocessing ke dalam file csv

data.to_csv('clean_data.csv')

## Feature Engineering

In [None]:
# pisahkan kolom feature dan target

x = data['clean_teks']
y = data['label']


In [None]:
# x untuk menampilkan data-data dari clean teks
x

0       promo beli paket flash my telkomsel app extra ...
1       rupiah ribu spesial pilih aktif promo sd novem...
2       langgan hormat sisa kuota flash kb download my...
3       langgan hormat sisa kuota flash kb download my...
4                rupiah ribu spesial pilih aktif buru skb
                              ...                        
1138                           yooo oke umumin grup kelas
1139                     nulis kerudung kirain warna jins
1140                                        mbak kirim ya
1141        nama beaok bwrangkat pagimau cas atay tranfer
1142                                       nomor bri nama
Name: clean_teks, Length: 1143, dtype: object

In [None]:
# y untuk menampilkan data-data dari clean teks
y

0       2
1       2
2       2
3       2
4       2
       ..
1138    0
1139    0
1140    0
1141    0
1142    0
Name: label, Length: 1143, dtype: int64

## Feature Extraction (TF-IDF dan N-Gram)

In [None]:
# save model
import pickle

# TF-IDF
from sklearn.feature_extraction.text import TfidfVectorizer

# Unigram
vec_TF_IDF = TfidfVectorizer(ngram_range=(1,1))
vec_TF_IDF.fit(x)

x_tf_idf = vec_TF_IDF.transform(x)

pickle.dump(vec_TF_IDF.vocabulary_, open("feature_tf-idf.csv", "wb"))


In [None]:
# menampilkan vocabulary dari tf-idf
vec_TF_IDF.vocabulary_

{'promo': 2295,
 'beli': 323,
 'paket': 2088,
 'flash': 870,
 'my': 1880,
 'telkomsel': 2875,
 'app': 162,
 'extra': 841,
 'kuota': 1549,
 'lte': 1652,
 'telpon': 2878,
 'mnthr': 1831,
 'buru': 480,
 'cek': 521,
 'tselmemytsel': 3013,
 'sk': 2691,
 'rupiah': 2503,
 'ribu': 2452,
 'spesial': 2750,
 'pilih': 2175,
 'aktif': 66,
 'sd': 2557,
 'november': 1989,
 'langgan': 1577,
 'hormat': 1116,
 'sisa': 2684,
 'kb': 1410,
 'download': 752,
 'mytelkomsel': 1882,
 'apps': 167,
 'kuotabeli': 1550,
 'hubung': 1140,
 'skb': 2692,
 'ekstra': 804,
 'pulsa': 2332,
 'internet': 1220,
 'bulan': 466,
 'sjk': 2690,
 'augsept': 217,
 'detail': 665,
 'iring': 1242,
 'tarif': 2841,
 'panjang': 2101,
 'hits': 1105,
 'armada': 180,
 'curi': 600,
 'hati': 1069,
 'tekan': 2869,
 'okcall': 2041,
 'informasi': 1191,
 'eks': 801,
 'loh': 1639,
 'internetan': 1221,
 'pakai': 2086,
 'volume': 3129,
 'ultima': 3066,
 'mbhr': 1738,
 'harga': 1057,
 'tariflokasi': 2843,
 'tselmefl': 3011,
 'coboy': 568,
 'jr': 1329

In [None]:
# Melihat jumlah feature
print(len(vec_TF_IDF.get_feature_names_out()))


3253


In [None]:
# melihat feature apa saja yang ada di dalam corpus
print(vec_TF_IDF.get_feature_names_out())

['aa' 'aamiiiin' 'aamiin' ... 'zjt' 'zona' 'ztkm']


In [None]:

x1 = vec_TF_IDF.transform(x).toarray()
data_tabular_tf_idf = pd.DataFrame(x1, columns=vec_TF_IDF.get_feature_names_out())

# Tampilkan DataFrame
data_tabular_tf_idf

Unnamed: 0,aa,aamiiiin,aamiin,ab,abadi,abai,abbee,abdul,acara,acaratks,...,yudisium,yuk,yuks,yuni,yunit,zalora,zarkasi,zjt,zona,ztkm
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Feature Selection

In [None]:
x_train = np.array(data_tabular_tf_idf)
y_train = np.array(y)

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

chi2_feature = SelectKBest(chi2, k=3000)
x_kbest_feature = chi2_feature.fit_transform(x_train, y_train)

# untuk reduced feature
print('Original Feature Number', x_train.shape[1])
print('Reduced Feature Number', x_kbest_feature.shape[1])




Original Feature Number 3253
Reduced Feature Number 3000


In [None]:
Data = pd.DataFrame(chi2_feature.scores_,columns=['Nilai'])
Data

Unnamed: 0,Nilai
0,0.835608
1,0.419698
2,1.558607
3,0.716455
4,0.800674
...,...
3248,1.180239
3249,0.503162
3250,0.716455
3251,2.918739


In [None]:
# menampilkan feature beserta nilainya

feature = vec_TF_IDF.get_feature_names_out()
Data = pd.DataFrame(chi2_feature.scores_,columns=['Nilai'])
feature

Data['Fitur'] = feature
Data

Unnamed: 0,Nilai,Fitur
0,0.835608,aa
1,0.419698,aamiiiin
2,1.558607,aamiin
3,0.716455,ab
4,0.800674,abadi
...,...,...
3248,1.180239,zalora
3249,0.503162,zarkasi
3250,0.716455,zjt
3251,2.918739,zona


In [None]:
# mengurutkan nilai feature terbaik
Data.sort_values(by='Nilai', ascending=False)

Unnamed: 0,Nilai,Fitur
2088,49.080686,paket
1030,46.356893,hadiah
1549,45.614332,kuota
2178,39.469258,pin
1486,34.488003,klik
...,...,...
1520,0.044714,kopi
307,0.044468,bca
1694,0.031579,maksimal
531,0.013783,cepat


In [None]:
mask = chi2_feature.get_support()
mask

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
# menampilkan feature yang terpilih berdasarkan nilai mask atau nilai tertinggi yang sudah di tetapkan pada chi square
new_feature = []
for bool, f in zip(mask, feature):
    if bool:
        new_feature.append(f)

selected_feature = new_feature
print(selected_feature)


['aa', 'aamiiiin', 'aamiin', 'ab', 'abadi', 'abai', 'abbee', 'abdul', 'acaratks', 'account', 'ada', 'adapromo', 'adi', 'adik', 'adison', 'admin', 'administrasi', 'adminlte', 'ado', 'adrian', 'adu', 'aduh', 'advertising', 'aea', 'aesthetic', 'afbe', 'affc', 'afr', 'afrika', 'agam', 'agen', 'agendain', 'agenpulsa', 'ags', 'agst', 'agsts', 'agt', 'agtskinfodlj', 'agua', 'agun', 'agus', 'agust', 'agustuskunjungi', 'ahaha', 'ahub', 'aidzin', 'aigoo', 'air', 'aja', 'ajaa', 'ajaaa', 'ajabri', 'ajak', 'ajeng', 'akang', 'akangteteh', 'akbar', 'akreditasi', 'akses', 'aksi', 'aktif', 'aktifasi', 'aktivasi', 'aktivitas', 'akucintaislam', 'akumulasi', 'akun', 'akurasi', 'akurat', 'alaikum', 'alaikumsaya', 'alaiqum', 'alam', 'alamat', 'alamsyah', 'alesannya', 'algoritma', 'alhamdulillah', 'alhuda', 'ali', 'aliando', 'all', 'allah', 'allahaamiin', 'alphard', 'alquran', 'alur', 'aman', 'amanda', 'ambil', 'amin', 'ampuun', 'an', 'anabdullah', 'anak', 'ananda', 'and', 'anda', 'andaafr', 'andabch', 'anda

In [None]:
# membuat vocabulary baru berdasarkan feature yang terseleksi

new_selected_feature = {}

for (k, v) in vec_TF_IDF.vocabulary_.items():
    if k in selected_feature:
        new_selected_feature[k] = v

print(new_selected_feature)


{'promo': 2295, 'beli': 323, 'paket': 2088, 'flash': 870, 'my': 1880, 'telkomsel': 2875, 'app': 162, 'extra': 841, 'kuota': 1549, 'lte': 1652, 'telpon': 2878, 'mnthr': 1831, 'buru': 480, 'cek': 521, 'tselmemytsel': 3013, 'sk': 2691, 'rupiah': 2503, 'ribu': 2452, 'spesial': 2750, 'pilih': 2175, 'aktif': 66, 'sd': 2557, 'november': 1989, 'langgan': 1577, 'hormat': 1116, 'sisa': 2684, 'kb': 1410, 'download': 752, 'mytelkomsel': 1882, 'apps': 167, 'kuotabeli': 1550, 'hubung': 1140, 'skb': 2692, 'ekstra': 804, 'pulsa': 2332, 'internet': 1220, 'bulan': 466, 'sjk': 2690, 'augsept': 217, 'detail': 665, 'iring': 1242, 'tarif': 2841, 'panjang': 2101, 'hits': 1105, 'armada': 180, 'curi': 600, 'hati': 1069, 'tekan': 2869, 'okcall': 2041, 'informasi': 1191, 'eks': 801, 'loh': 1639, 'internetan': 1221, 'pakai': 2086, 'volume': 3129, 'ultima': 3066, 'mbhr': 1738, 'harga': 1057, 'tariflokasi': 2843, 'tselmefl': 3011, 'coboy': 568, 'jr': 1329, 'baru': 283, 'noah': 1964, 'engkau': 816, 'nya': 2010, 'hp'

In [None]:
len(new_selected_feature)

3000

In [None]:
pickle.dump(new_selected_feature,open("new_selected_feature_tf-idf.sav","wb"))

In [None]:
# menampilkan fitur-fitur yang sudah diseleksi

data_selected_feature = pd.DataFrame(x_kbest_feature, columns=selected_feature)
data_selected_feature

Unnamed: 0,aa,aamiiiin,aamiin,ab,abadi,abai,abbee,abdul,acaratks,account,...,yudisium,yuk,yuks,yuni,yunit,zalora,zarkasi,zjt,zona,ztkm
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Modeling

In [None]:
 selected_x = x_kbest_feature
 selected_x

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
# import library
import random
from sklearn.model_selection import train_test_split

# import algoritma naive bayes
from sklearn.naive_bayes import MultinomialNB

In [None]:
x = selected_x
y = data.label

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)


In [None]:
# menampilkan jumlah data training dan data testing
print('Banyaknya X_train : ', len(x_train))
print('Banyaknya X_test : ', len(x_test))
print('Banyaknya y_train : ', len(y_train))
print('Banyaknya y_test : ', len(y_test))

Banyaknya X_train :  914
Banyaknya X_test :  229
Banyaknya y_train :  914
Banyaknya y_test :  229


In [None]:
# proses training menggunakan naive bayes
text_algorithm = MultinomialNB()

In [None]:
model = text_algorithm.fit(x_train, y_train)

In [None]:
# membuat model prediksi

data_input = ("Tolong belikan dulu mama pulsa 25rb di no AS baru mama 085245318024 soalnya teman mama kecelakaan, skrg kluarganya hrus dihubungi, nti mama ganti uangnya,penting")
data_input = text_preprocessing_process(data_input)

# load
tfidf = TfidfVectorizer

loaded_vec = TfidfVectorizer(decode_error="replace", vocabulary=set(pickle.load(open("new_selected_feature_tf-idf.sav", "rb"))))

hasil = model.predict(loaded_vec.fit_transform([data_input]))


if(hasil==0):
  s = "SMS Normal"
elif(hasil==1):
  s = "SMS Fraud"
else:
  s = "SMS Promo"

print("Hasil Prediksi : \n", s)

Hasil Prediksi : 
 SMS Fraud


## Evaluasi Model

In [None]:
# memasukan library yang dibutuhkan untuk proses testing
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

predicted = model.predict(x_test)

CM = confusion_matrix(y_test, predicted)

print(classification_report(y_test, predicted))

              precision    recall  f1-score   support

           0       0.95      0.94      0.95       126
           1       0.89      0.89      0.89        66
           2       0.84      0.86      0.85        37

    accuracy                           0.92       229
   macro avg       0.90      0.90      0.90       229
weighted avg       0.92      0.92      0.92       229



In [None]:
# menyimpan model
pickle.dump(model,open("model_fraud.sav","wb"))