## Memasukan Library ##

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [6]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\parli\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## LOAD DATASET ##

In [7]:
data = pd.read_csv('data (2) copy.csv')
data.head()

Unnamed: 0,teks,label
0,Big Data adalah istilah yang menggambarkan dat...,3
1,Data hanyalah angka dan huruf apabila tidak ad...,3
2,Data Analis adalah seseorang yang diminta meme...,3
3,Data analis akan diberikan akses ke data-data ...,3
4,Data Analis memerlukan keterampilan teknik vis...,3


In [8]:
data.tail()


Unnamed: 0,teks,label
1365,"Untuk menghitung jalur rute, pesan topologi se...",1
1366,Untuk mengurangi jumlah paket dan lost bandwid...,1
1367,OLSR menggunakan seperangkat node khusus yang ...,1
1368,Node ini digunakan untuk merelai paket. Setiap...,1
1369,"Pertama, diperkenalkannya set MPRs dan kedua p...",1


## PREPROCESSING ##

## 1. CASE FOLDING ##

In [10]:
import re

#membuat fungsi untuk case folding (pembersihan data)

def casefolding(text):
    text = text.lower()                             #merubah kalimat menjadi huruf kecil
    text = re.sub(r'[-+]?[0-9]+','',text)           #menghapus angka
    text = re.sub(r'[^\w\s]', '', text)             #menghapus simbol/tanda baca
    text = text.strip()                             #menghapus spasi awal dan akhir
    return text

In [11]:
# membandingkan before after case folding

raw_sample = data['teks'].iloc[219]
case_folding = casefolding(raw_sample)

print('Raw Data \t : ', raw_sample)
print('Case Folding\t : ', case_folding)

Raw Data 	 :  Salah satu soft skill penting yang wajib dimiliki seorang business intelligence adalah kemampuan analisis. Selain berkaitan erat dengan data, profesi yang satu ini tak dapat dilepaskan dari analisis. 
Case Folding	 :  salah satu soft skill penting yang wajib dimiliki seorang business intelligence adalah kemampuan analisis selain berkaitan erat dengan data profesi yang satu ini tak dapat dilepaskan dari analisis


## 2. Filtering (STOPWORD REMOVAL)

In [12]:
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords

stopwords_ind = stopwords.words('indonesian')

In [13]:
# Membuat Fungsi Stopword Removal (Menghilangkan kata tidak penting)

def remove_stopword(text):
    clean_words = []
    text = text.split()
    for word in text:
        if word not in stopwords_ind:
            clean_words.append(word)
    return " ".join(clean_words)

In [14]:
raw_sample = data['teks'].iloc[219]
case_folding = casefolding(raw_sample)
stopword_removal = remove_stopword(case_folding)

print('Raw Data\t\t : ', raw_sample)
print('Case Folding\t\t : ', case_folding)
print('Stop Word Removal\t: ', stopword_removal)

Raw Data		 :  Salah satu soft skill penting yang wajib dimiliki seorang business intelligence adalah kemampuan analisis. Selain berkaitan erat dengan data, profesi yang satu ini tak dapat dilepaskan dari analisis. 
Case Folding		 :  salah satu soft skill penting yang wajib dimiliki seorang business intelligence adalah kemampuan analisis selain berkaitan erat dengan data profesi yang satu ini tak dapat dilepaskan dari analisis
Stop Word Removal	:  salah soft skill wajib dimiliki business intelligence kemampuan analisis berkaitan erat data profesi dilepaskan analisis


## 3. STEMMING

In [17]:
# Merubah kata menjadi kata dasar

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

factory = StemmerFactory()
stemmer = factory.create_stemmer()


# membuat stemming untuk bahasa indonesia
def stemming(text):
    text = stemmer.stem(text)
    return text

In [18]:
#Before after stemming

raw_sample = data['teks'].iloc[219]
case_folding = casefolding(raw_sample)
stopword_removal = remove_stopword(case_folding)
text_stemming = stemming(stopword_removal)

print('Raw Data\t\t : ', raw_sample)
print('Case Folding\t\t : ', case_folding)
print('Stop Word Removal\t: ', stopword_removal)
print('Stemming \t\t : ', text_stemming)

Raw Data		 :  Salah satu soft skill penting yang wajib dimiliki seorang business intelligence adalah kemampuan analisis. Selain berkaitan erat dengan data, profesi yang satu ini tak dapat dilepaskan dari analisis. 
Case Folding		 :  salah satu soft skill penting yang wajib dimiliki seorang business intelligence adalah kemampuan analisis selain berkaitan erat dengan data profesi yang satu ini tak dapat dilepaskan dari analisis
Stop Word Removal	:  salah soft skill wajib dimiliki business intelligence kemampuan analisis berkaitan erat data profesi dilepaskan analisis
Stemming 		 :  salah soft skill wajib milik business intelligence mampu analisis kait erat data profesi lepas analisis


## TEXT PREPROCESSING PIPELINE

In [19]:
# Menggabungkan Langkah langkah PREPROCESSING

def text_preprocessing(text):
    text = casefolding(text)
    text = remove_stopword(text)
    text = stemming(text)
    
    return text


In [20]:
%%time
data['clean_teks'] = data['teks'].apply(text_preprocessing)

CPU times: total: 1min 34s
Wall time: 1min 35s


In [21]:
data

Unnamed: 0,teks,label,clean_teks
0,Big Data adalah istilah yang menggambarkan dat...,3,big data istilah gambar data volume sifat komp...
1,Data hanyalah angka dan huruf apabila tidak ad...,3,data angka huruf tindak olah informasi dapat o...
2,Data Analis adalah seseorang yang diminta meme...,3,data analis pecah masalah analisa data kait
3,Data analis akan diberikan akses ke data-data ...,3,data analis akses datadata butuh transformasi ...
4,Data Analis memerlukan keterampilan teknik vis...,3,data analis terampil teknik visualisasi data s...
...,...,...,...
1365,"Untuk menghitung jalur rute, pesan topologi se...",1,hitung jalur rute pesan topologi kala tingkat ...
1366,Untuk mengurangi jumlah paket dan lost bandwid...,1,kurang paket lost bandwidth
1367,OLSR menggunakan seperangkat node khusus yang ...,1,olsr perangkat node khusus multi point relaysm...
1368,Node ini digunakan untuk merelai paket. Setiap...,1,node relai paket node pilih kumpul node tetang...


In [22]:
# simpan clean_teks (sudah did preprocessing)
data.to_csv('clean_data.csv')

## FEAUTURE ENGINEERING

In [23]:
#Memisahkan kolom fitur dan label
x = data['clean_teks']
y = data['label']

In [24]:
x

0       big data istilah gambar data volume sifat komp...
1       data angka huruf tindak olah informasi dapat o...
2             data analis pecah masalah analisa data kait
3       data analis akses datadata butuh transformasi ...
4       data analis terampil teknik visualisasi data s...
                              ...                        
1365    hitung jalur rute pesan topologi kala tingkat ...
1366                          kurang paket lost bandwidth
1367    olsr perangkat node khusus multi point relaysm...
1368    node relai paket node pilih kumpul node tetang...
1369    kenal set mprs kurang ukur pesan kontrol tingk...
Name: clean_teks, Length: 1370, dtype: object

In [25]:
y

0       3
1       3
2       3
3       3
4       3
       ..
1365    1
1366    1
1367    1
1368    1
1369    1
Name: label, Length: 1370, dtype: int64

## Feature Extraction (TF-IDF dan N-Gram)

In [27]:
# save model
import pickle

#TF-IDF
from sklearn.feature_extraction.text import TfidfVectorizer

#UNIGram
vec_TF_IDF = TfidfVectorizer(ngram_range=(1,1))
vec_TF_IDF.fit(x)

x_tf_idf = vec_TF_IDF.transform(x)

pickle.dump(vec_TF_IDF.vocabulary_,open("Feautue_tf-idf.sav","wb"))

In [28]:
# Menampilkan vocabulary dari tf-idf

vec_TF_IDF.vocabulary_

{'big': 238,
 'data': 402,
 'istilah': 892,
 'gambar': 656,
 'volume': 2304,
 'sifat': 1923,
 'kompleks': 1035,
 'struktur': 2024,
 'tambah': 2065,
 'angka': 103,
 'huruf': 764,
 'tindak': 2154,
 'olah': 1433,
 'informasi': 814,
 'dapat': 394,
 'pakai': 1489,
 'maju': 1224,
 'aktivitas': 50,
 'pribadi': 1623,
 'institusi': 839,
 'organisasi': 1463,
 'usaha': 2265,
 'analis': 84,
 'pecah': 1518,
 'masalah': 1259,
 'analisa': 85,
 'kait': 945,
 'akses': 45,
 'datadata': 405,
 'butuh': 289,
 'transformasi': 2191,
 'model': 1333,
 'ambil': 79,
 'simpul': 1933,
 'visualisasi': 2298,
 'terampil': 2121,
 'teknik': 2095,
 'statistik': 2009,
 'ringkas': 1783,
 'inferensial': 812,
 'presentasi': 1619,
 'komunikasi': 1045,
 'scientist': 1847,
 'algoritma': 65,
 'machine': 1217,
 'learning': 1147,
 'prediksi': 1614,
 'peristiwa': 1554,
 'sulap': 2034,
 'sihir': 1927,
 'cenayang': 311,
 'yaa': 2355,
 'bangun': 194,
 'dasar': 399,
 'nali': 1378,
 'klasifikasi': 1009,
 'pola': 1582,
 'programming': 1

In [29]:
# melihat jumlah feature

print(len(vec_TF_IDF.get_feature_names_out()))

2364


In [30]:
# melihat fitur apa saja yang ada dalam corpus

print(vec_TF_IDF.get_feature_names_out())

['abai' 'absolut' 'abstraksi' ... 'zigbee' 'zona' 'zone']


In [31]:
x1 = vec_TF_IDF.transform(x).toarray()
data_tabular_tf_idf = pd.DataFrame(x1,columns=vec_TF_IDF.get_feature_names_out())

data_tabular_tf_idf

Unnamed: 0,abai,absolut,abstraksi,acak,acara,acces,access,accessadalah,accesspre,accurate,...,ya,yaa,yaitulower,yanti,yogyakarta,yup,zain,zigbee,zona,zone
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1367,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
data_tabular_tf_idf.iloc[10:20,60:70]

Unnamed: 0,alat,alatalat,alert,algorithm,algorithms,algoritma,algoritmaalgoritma,algoritmanya,algoritme,alir
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.134144,0.0,0.26635,0.0,0.0
18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Feature Selection

In [33]:
x_train = np.array(data_tabular_tf_idf)
y_train = np.array(y)

In [34]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

chi2_features = SelectKBest(chi2, k=2000)
x_kbest_features = chi2_features.fit_transform(x_train,y_train)

# untuk reduced features

print('Original Feature Number' , x_train.shape[1])
print('Reduce Feature Number ', x_kbest_features.shape[1])

Original Feature Number 2364
Reduce Feature Number  2000


In [35]:
Data = pd.DataFrame(chi2_features.scores_,columns=['Nilai'])
Data

Unnamed: 0,Nilai
0,0.916082
1,0.363400
2,0.366807
3,0.304541
4,0.800274
...,...
2359,0.655398
2360,1.198734
2361,9.263965
2362,0.791559


In [36]:
# Menampilkan Feature beserta nilainya 

feature = vec_TF_IDF.get_feature_names_out()
feature

Data['Fitur'] = feature
Data

Unnamed: 0,Nilai,Fitur
0,0.916082,abai
1,0.363400,absolut
2,0.366807,abstraksi
3,0.304541,acak
4,0.800274,acara
...,...,...
2359,0.655398,yup
2360,1.198734,zain
2361,9.263965,zigbee
2362,0.791559,zona


In [37]:
# Mengurutkan nilai fitur terbaik

Data.sort_values(by='Nilai',ascending=False)

Unnamed: 0,Nilai,Fitur
402,99.818488,data
912,93.351756,jaring
695,64.061403,guna
1212,58.485732,lunak
1539,56.741738,perangkat
...,...,...
1392,0.018688,negatif
859,0.015034,interface
183,0.007290,bagi
2216,0.005701,tumbuh


In [38]:
mask = chi2_features.get_support()
mask

array([ True, False, False, ...,  True,  True,  True])

In [39]:
# menampilkan fitur yang terpilih berdasarkan nilai mask/nilai tertinggi yang sudah ditetapkan pada chi square

new_feature =[]
for bool, f in zip(mask,feature):
    if bool :
        new_feature.append(f)
    selected_feature = new_feature
    
selected_feature

['abai',
 'acara',
 'acces',
 'access',
 'accessadalah',
 'accesspre',
 'acl',
 'active',
 'acu',
 'ad',
 'adaboost',
 'adaptasi',
 'adaptif',
 'address',
 'adhoc',
 'adi',
 'admin',
 'adrian',
 'advanced',
 'agam',
 'agency',
 'aggregation',
 'agile',
 'agregasi',
 'agresi',
 'ahli',
 'ahx',
 'ai',
 'air',
 'airflow',
 'airmas',
 'ajar',
 'akademik',
 'akal',
 'akar',
 'akibat',
 'akses',
 'aksesibilitas',
 'aksi',
 'aktifitas',
 'aktivitas',
 'aktual',
 'aku',
 'akurasi',
 'akurat',
 'alam',
 'alamat',
 'alami',
 'alarm',
 'alas',
 'alatalat',
 'alert',
 'algorithms',
 'algoritma',
 'algoritmanya',
 'algoritme',
 'alir',
 'alliance',
 'allocation',
 'alokasi',
 'alur',
 'alya',
 'aman',
 'ambigu',
 'ambil',
 'amerika',
 'anakanak',
 'analis',
 'analisa',
 'analisis',
 'analitik',
 'analogy',
 'analysis',
 'analyst',
 'analysts',
 'analytical',
 'analytics',
 'ancam',
 'ancang',
 'and',
 'andal',
 'andapenyimpanan',
 'aneka',
 'anggap',
 'anggota',
 'angka',
 'angkaangka',
 'angkat',


In [40]:
# membuat vocabulary baru berdasarkan fitur yang terseleksi

new_selected_feature = {}

for (k,v) in vec_TF_IDF.vocabulary_.items():
    if k in selected_feature:
        new_selected_feature[k]=v
        
new_selected_feature

{'big': 238,
 'data': 402,
 'istilah': 892,
 'gambar': 656,
 'volume': 2304,
 'sifat': 1923,
 'kompleks': 1035,
 'struktur': 2024,
 'tambah': 2065,
 'angka': 103,
 'huruf': 764,
 'tindak': 2154,
 'olah': 1433,
 'dapat': 394,
 'pakai': 1489,
 'maju': 1224,
 'aktivitas': 50,
 'pribadi': 1623,
 'organisasi': 1463,
 'usaha': 2265,
 'analis': 84,
 'pecah': 1518,
 'masalah': 1259,
 'analisa': 85,
 'akses': 45,
 'datadata': 405,
 'butuh': 289,
 'transformasi': 2191,
 'model': 1333,
 'ambil': 79,
 'simpul': 1933,
 'visualisasi': 2298,
 'terampil': 2121,
 'teknik': 2095,
 'statistik': 2009,
 'ringkas': 1783,
 'inferensial': 812,
 'presentasi': 1619,
 'komunikasi': 1045,
 'scientist': 1847,
 'algoritma': 65,
 'machine': 1217,
 'learning': 1147,
 'prediksi': 1614,
 'peristiwa': 1554,
 'sulap': 2034,
 'sihir': 1927,
 'cenayang': 311,
 'yaa': 2355,
 'bangun': 194,
 'dasar': 399,
 'klasifikasi': 1009,
 'pola': 1582,
 'adi': 20,
 'beda': 213,
 'analisis': 86,
 'analyst': 90,
 'tanggung': 2076,
 'rise

In [41]:
len(new_selected_feature)

2000

In [42]:
pickle.dump(new_selected_feature, open("new_selected_feature_tf-idf.sav","wb"))

In [43]:
# menampilkan fitur fitur yang sudah diseleksi

data_selected_feature = pd.DataFrame(x_kbest_features, columns=selected_feature)
data_selected_feature

Unnamed: 0,abai,acara,acces,access,accessadalah,accesspre,acl,active,acu,ad,...,xss,ya,yaa,yanti,yogyakarta,yup,zain,zigbee,zona,zone
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1367,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## PEMBUATAN MODEL / MODELING DENGAN NAIVE BAYES

In [44]:
selected_x = x_kbest_features
selected_x

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [47]:
 # import library
 
import random 
from sklearn.model_selection import train_test_split
 
 # import algorithm naive bayes
 
from sklearn.naive_bayes import MultinomialNB
 

In [48]:
x = selected_x
y = data.label

x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2)

In [49]:
# menampilkan jumlah data training dan data testing
print('Banyaknya X-Train : ', len(x_train))
print('Banyaknya Y-Train : ', len(y_train))
print('Banyaknya X-Test : ', len(x_test))
print('Banyaknya Y-Test : ', len(y_test))

Banyaknya X-Train :  1096
Banyaknya Y-Train :  1096
Banyaknya X-Test :  274
Banyaknya Y-Test :  274


In [50]:
# proses training dengan naive bayes
text_algorithm = MultinomialNB()

In [51]:
model = text_algorithm.fit(x_train,y_train)

In [52]:
# membuat model prediksi

data_input = ("Java salah satu bahasa pemrograman untuk mengembangkan perangkat lunak")
data_input = text_preprocessing(data_input)

# load
tfidf = TfidfVectorizer

loaded_vec = TfidfVectorizer(decode_error="replace",vocabulary=set(pickle.load(open("new_selected_feature_tf-idf.sav","rb"))))

hasil = model.predict(loaded_vec.fit_transform([data_input]))

if(hasil==1):
    s = "Jaringan"
elif(hasil==2):
    s = "Perangkat Lunak"
else:
    s = "Data"
    
print("Hasil Prediksi : \n",s)

Hasil Prediksi : 
 Perangkat Lunak


In [53]:
# masukkan library yg dibutuhkan

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

predicted = model.predict(x_test)

CM = confusion_matrix(y_test,predicted)

print(classification_report(y_test,predicted))

              precision    recall  f1-score   support

           1       1.00      0.97      0.99        78
           2       0.94      1.00      0.97       100
           3       1.00      0.96      0.98        96

    accuracy                           0.98       274
   macro avg       0.98      0.98      0.98       274
weighted avg       0.98      0.98      0.98       274



## Pembuata MODEL SVM


In [54]:
# import Algorithm SVC
from sklearn.svm import SVC


In [55]:

model_svm = SVC(kernel="linear",C=1.0,random_state=None)

In [56]:
model_svm.fit(x_train,y_train)

In [57]:
pred = model_svm.predict(x_test)

pred

array([2, 1, 3, 2, 3, 3, 3, 2, 2, 2, 1, 1, 1, 2, 3, 2, 3, 2, 1, 3, 3, 3,
       3, 3, 1, 1, 1, 3, 2, 1, 2, 1, 1, 2, 2, 3, 2, 1, 3, 2, 2, 1, 2, 1,
       1, 2, 3, 2, 2, 2, 3, 2, 1, 3, 2, 1, 3, 3, 3, 2, 2, 3, 2, 1, 3, 3,
       3, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3, 1, 3, 1, 3, 2, 1, 3, 3, 2, 3, 1,
       1, 3, 1, 1, 2, 2, 3, 2, 1, 1, 1, 2, 2, 3, 3, 2, 3, 1, 2, 1, 3, 2,
       3, 3, 3, 3, 2, 3, 1, 3, 1, 3, 1, 2, 2, 1, 2, 3, 2, 2, 1, 2, 1, 3,
       3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1, 2, 3, 2, 2, 3, 1, 2, 2,
       3, 2, 2, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 3, 3, 2, 1, 3, 3, 2, 2, 2,
       3, 1, 3, 3, 3, 3, 1, 3, 1, 2, 3, 3, 1, 1, 1, 2, 3, 1, 1, 3, 2, 3,
       2, 2, 1, 3, 3, 2, 1, 3, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 3, 2,
       3, 2, 3, 2, 1, 1, 3, 1, 2, 3, 2, 1, 3, 1, 2, 1, 1, 2, 3, 2, 3, 2,
       2, 1, 2, 2, 1, 2, 3, 1, 1, 3, 3, 1, 3, 2, 3, 3, 2, 1, 3, 2, 1, 2,
       2, 3, 1, 2, 3, 3, 3, 1, 2, 2], dtype=int64)

In [58]:
# masukkan library yg dibutuhkan

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

CM = confusion_matrix(y_test,pred)

print(classification_report(y_test,pred))

              precision    recall  f1-score   support

           1       0.99      0.96      0.97        78
           2       0.96      0.98      0.97       100
           3       0.99      0.99      0.99        96

    accuracy                           0.98       274
   macro avg       0.98      0.98      0.98       274
weighted avg       0.98      0.98      0.98       274



In [59]:
#Mmenyimpan model
pickle.dump(model_svm,open("model_klasifikasi_svm.sav","wb"))