In [1]:
# Implementasi Normalisasi
def norm_data(data):
    '''
    Melakukan normalisasi data.
    
    Parameters:
        data (list): Data yang akan dinormalisasi
        
    Returns:
        data (list): Data hasil normalisasi    
    '''
    
    data_max = max(data)
    data_min = min(data)
    data_len = len(data)
    
    for i in range(0, data_len):
        data[i] = (data[i] - data_min) / (data_max - data_min)
        
    return data

# Contoh Penggunaan
data = [10, 11, 12, 14, 16]
n_data = norm_data(data) #melakukan normalisasi
print(n_data)

[0.0, 0.16666666666666666, 0.3333333333333333, 0.6666666666666666, 1.0]


In [6]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

np.set_printoptions(precision=6) #bulatkan 4 angka koma
np.set_printoptions(suppress=True) #hilangkan nilai e

# Kita akan membentuk data
# Hal ini dikarenakan, scikit-learn hanya menerima input
# dalam bentuk n-dimensional array
data = [
    [100, 0.0001],
    [50, 0.05],
    [30, 0.003]
]

#Ubah ke bentuk numpy n-dimensioal array
data = np.array(data)
print('Data Asli')
print(data)

#Mendefinisikan obyek MinMaxScaler
scaler = MinMaxScaler()
#Transformation data
scaled = scaler.fit_transform(data)
print('Data Normalisasi')
print(scaled)

Data Asli
[[100.       0.0001]
 [ 50.       0.05  ]
 [ 30.       0.003 ]]
Data Normalisasi
[[1.       0.      ]
 [0.285714 1.      ]
 [0.       0.058116]]


In [7]:
import numpy as np
from sklearn.preprocessing import StandardScaler

np.set_printoptions(precision=6) #bulatkan 4 angka koma
np.set_printoptions(suppress=True) #hilangkan nilai e

# Kita akan membentuk data
# Hal ini dikarenakan, scikit-learn hanya menerima input
# dalam bentuk n-dimensional array
data = [
    [100, 0.0001],
    [50, 0.05],
    [30, 0.003]
]

#Ubah ke bentuk numpy n-dimensioal array
data = np.asarray(data)
print('Data Asli')
print(data)

#Mendefinisikan obyek MinMaxScaler
scaler = StandardScaler()
#Transformation data
scaled = scaler.fit_transform(data)
print('Data Standarisasi')
print(scaled)

Data Asli
[[100.       0.0001]
 [ 50.       0.05  ]
 [ 30.       0.003 ]]
Data Standarisasi
[[ 1.358732 -0.76956 ]
 [-0.339683  1.412317]
 [-1.019049 -0.642757]]


In [8]:
from sklearn.preprocessing import OrdinalEncoder

#Inisiasi obyek ordinal Encoder
oe = OrdinalEncoder()

#Definisikan data
#Dalam bentuk 2d
data = [
    ['Politeknik Negeri Malang'],
    ['Politeknik Elektronika Negeri Surabaya'],
    ['Politeknik Negeri Jakarta'],
    ['Politeknik Negeri Semarang'],
]

#Transformasi Ordinal Encoder
transform_oe = oe.fit_transform(data)

print('Data Asli')
print(data)

print('Data Transformasi Ordinal Encoder')
print(transform_oe)

Data Asli
[['Politeknik Negeri Malang'], ['Politeknik Elektronika Negeri Surabaya'], ['Politeknik Negeri Jakarta'], ['Politeknik Negeri Semarang']]
Data Transformasi Ordinal Encoder
[[2.]
 [0.]
 [1.]
 [3.]]


In [9]:
from sklearn.preprocessing import OneHotEncoder

#Inisiasi obyek Ordinal Encoder
ohe = OneHotEncoder()

#Definisikan data
#Dalam bentuk 2d

data = [
    ['Politeknik Negeri Malang'],
    ['Politeknik Elektronika Negeri Surabaya'],
    ['Politeknik Negeri Jakarta'],
    ['Politeknik Negeri Semarang'],
]

#Tranformasi One Hot Encoder
transform_ohe = ohe.fit_transform(data)

print('Data Asli')
print(data)

print('Data Transformasi One-Hot Encoding')
print(transform_ohe.toarray())

Data Asli
[['Politeknik Negeri Malang'], ['Politeknik Elektronika Negeri Surabaya'], ['Politeknik Negeri Jakarta'], ['Politeknik Negeri Semarang']]
Data Transformasi One-Hot Encoding
[[0. 0. 1. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]]


In [10]:
from sklearn.preprocessing import OneHotEncoder

#Inisiasi obyek Ordinal Encoder
ohe = OneHotEncoder(drop='first')

#Definisikan data
#Dalam bentuk 2d

data = [
    ['Politeknik Negeri Malang'],
    ['Politeknik Elektronika Negeri Surabaya'],
    ['Politeknik Negeri Jakarta'],
    ['Politeknik Negeri Semarang'],
]

#Tranformasi One Hot Encoder
transform_ohe = ohe.fit_transform(data)

print('Data Asli')
print(data)

print('Data Transformasi One-Hot Encoding')
print(transform_ohe.toarray())

Data Asli
[['Politeknik Negeri Malang'], ['Politeknik Elektronika Negeri Surabaya'], ['Politeknik Negeri Jakarta'], ['Politeknik Negeri Semarang']]
Data Transformasi One-Hot Encoding
[[0. 1. 0.]
 [0. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]]


In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

corpus = [
    'the house had a tiny little mouse',
    'the cat saw the mouse',
    'the mouse ran away from the house',
    'the cat finally ate the mouse',
    'the end of the mouse story'
]

#Inisiasi obyek TfidfVectorizer
vect = TfidfVectorizer(stop_words='english')

#Pembobotan TF-IDF
resp = vect.fit_transform(corpus)

#Cetak hasil
print('Hasil TF-IDF')
print(resp)

#Cetak kata yang dihasilkan
print('Hasil Token')
vect.get_feature_names_out()

Hasil TF-IDF
  (0, 7)	0.2808823162882302
  (0, 6)	0.5894630806320427
  (0, 11)	0.5894630806320427
  (0, 5)	0.47557510189256375
  (1, 9)	0.7297183669435993
  (1, 2)	0.5887321837696324
  (1, 7)	0.3477147117091919
  (2, 1)	0.5894630806320427
  (2, 8)	0.5894630806320427
  (2, 7)	0.2808823162882302
  (2, 5)	0.47557510189256375
  (3, 0)	0.5894630806320427
  (3, 4)	0.5894630806320427
  (3, 2)	0.47557510189256375
  (3, 7)	0.2808823162882302
  (4, 10)	0.6700917930430479
  (4, 3)	0.6700917930430479
  (4, 7)	0.3193023297639811
Hasil Token


array(['ate', 'away', 'cat', 'end', 'finally', 'house', 'little', 'mouse',
       'ran', 'saw', 'story', 'tiny'], dtype=object)

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Baca konten dari file corpus.txt
with open('corpus.txt', 'r') as file:
    corpus = [line.strip() for line in file.readlines()]

# Inisiasi obyek TfidfVectorizer
vect = TfidfVectorizer(stop_words='english')

# Pembobotan TF-IDF
resp = vect.fit_transform(corpus)

# Cetak hasil
print('Hasil TF-IDF:')
print(resp)

# Cetak kata yang dihasilkan (token)
print('Hasil Token:')
print(vect.get_feature_names_out())


Hasil TF-IDF:
  (0, 7)	0.2808823162882302
  (0, 6)	0.5894630806320427
  (0, 11)	0.5894630806320427
  (0, 5)	0.47557510189256375
  (1, 9)	0.7297183669435993
  (1, 2)	0.5887321837696324
  (1, 7)	0.3477147117091919
  (2, 1)	0.5894630806320427
  (2, 8)	0.5894630806320427
  (2, 7)	0.2808823162882302
  (2, 5)	0.47557510189256375
  (3, 0)	0.5894630806320427
  (3, 4)	0.5894630806320427
  (3, 2)	0.47557510189256375
  (3, 7)	0.2808823162882302
  (4, 10)	0.6700917930430479
  (4, 3)	0.6700917930430479
  (4, 7)	0.3193023297639811
Hasil Token:
['ate' 'away' 'cat' 'end' 'finally' 'house' 'little' 'mouse' 'ran' 'saw'
 'story' 'tiny']
