In [24]:
import pandas as pd
import re
import nltk
import json
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

nltk.download('stopwords')
stop_words_id = list(set(stopwords.words('indonesian')))



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
df = pd.read_csv('dataset.csv')
df[['text', 'sentiment']] = df[['text', 'sentiment']].astype(str).fillna('')
df = df[['text', 'sentiment']]
df.head(100)

# **1. Preprocessing Data**

In [None]:
df = df.drop_duplicates(subset='text')
df.duplicated().sum()

Cleaning Data

In [None]:
def clean_data(text):
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['text'] = df['text'].apply(clean_data)

Case Folding

In [None]:
df['text'] = df['text'].str.lower()
df

Normalization

In [None]:
def normalize(text):
    def load_normalization_dict():
        with open('normalization_dict.json', 'r') as file:
            normalization_dict = json.load(file)
        return normalization_dict
    normalization_dict = load_normalization_dict()
    for word, replacement in normalization_dict.items():
        pattern = r'\b' + re.escape(word) + r'\b'
        text = re.sub(pattern, replacement, text)
    return text

df['text'] = df['text'].astype(str).apply(lambda x: normalize(x))
df

Stopword Removal

In [None]:
import Sastrawi
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory, StopWordRemover, ArrayDictionary
more_stop_words = ['kok', 'cuk','v', 'sih', 'kan', 'loh', 'duh', 'wah', 'yang', 'untuk', 'pada', 'ke', 'para', 'namun', 'menurut', 'antara', 'dia', 'dua', 'ia', 'seperti', 'jika', 'sehingga', 'kembali', 'dan', 'ini', 'karena', 'kepada', 'oleh', 'saat', 'harus', 'setelah', 'kami', 'sekitar', 'bagi', 'serta', 'di', 'dari', 'telah', 'sebagai', 'masih', 'hal', 'ketika', 'adalah', 'itu', 'dalam', 'bisa', 'bahwa', 'atau', 'hanya', 'kita', 'dengan', 'akan', 'juga', 'ada', 'mereka', 'sudah', 'saya', 'terhadap', 'secara', 'agar', 'lain', 'kenapa', 'yaitu', 'yakni', 'daripada', 'itulah', 'lagi', 'maka', 'tentang', 'demi', 'dimana', 'kemana', 'pula', 'sambil', 'supaya', 'guna', 'kah', 'pun', 'sampai', 'sedangkan', 'selagi', 'apakah', 'sebab', 'selain', 'seolah', 'seraya', 'seterusnya', 'tanpa', 'agak', 'boleh', 'dapat', 'dsb', 'dst', 'dll', 'dahulu', 'dulunya', 'anu', 'demikian', 'ingin', 'juga', 'nggak', 'mari', 'nanti', 'melainkan', 'oh', 'ok', 'seharusnya', 'sebetulnya', 'setiap', 'setidaknya', 'sesuatu', 'pasti', 'saja', 'toh', 'ya', 'walau', 'tolong', 'tentu', 'amat', 'apalagi', 'bagaimanapun', 'sekali', 'jadi', 'nya']
stop_words = StopWordRemoverFactory().get_stop_words()
stop_words.extend(more_stop_words)

new_array = ArrayDictionary(stop_words)
stop_words_remover_new = StopWordRemover(new_array)

def stopword_removal(str_text):
    str_text = stop_words_remover_new.remove(str_text)
    return str_text

df['text'] = df['text'].apply(lambda x: stopword_removal(x))
df

Tokenizing

In [None]:
df['tokenized'] = df['text'].astype(str).apply(lambda x:x.split())
df

Stemming

In [None]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

def stemming(text_cleaning):
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    return stemmer.stem(text_cleaning)

df['stemmed'] = df['text'].astype(str).apply(stemming)
df[['stemmed', 'sentiment']].to_csv('executed.csv', index=False, encoding='latin1')

In [121]:
df = pd.read_csv('executed.csv', encoding='latin1')
df = df.drop('Unnamed: 0', axis=1)
df

Unnamed: 0,stemmed,sentiment
0,benci bacot masih nonton,-1
1,mantap kontol,-1
2,setan,-1
3,tai,-1
4,titit,-1
...,...,...
10915,yo lah anak pantek ang mah,-1
10916,youtube emang atur buat konten yg baik didik y...,-1
10917,youtuber paling jujur kocak benci buat eriko l...,-1
10918,saya beli anjing,1


Translate

In [122]:
%pip install translate

^C
Note: you may need to restart the kernel to use updated packages.




In [None]:
# Ini yang en ke indo
from translate import Translator

def translate_id(text):
    try:
        translator = Translator(from_lang='en', to_lang="id")
        translation = translator.translate(text)
        return translation
    except Exception as e:
        print(f"Error in translation: {e}")
        return text

df['stemmed'] = df['stemmed'].astype(str).apply(translate_id)
df['stemmed'].to_csv('TranslatedSampleID.csv')

In [None]:
%pip install preprocessor
%pip install textblob
%pip install wordcloud
%pip install nltk

In [22]:
df = pd.read_csv('executed.csv', encoding='latin1')
df = df.drop('Unnamed: 0', axis=1)
df


Unnamed: 0,stemmed,sentiment
0,benci bacot masih nonton,-1
1,mantap kontol,-1
2,setan,-1
3,tai,-1
4,titit,-1
...,...,...
10905,yo lah anak pantek ang mah,-1
10906,youtube emang atur buat konten yg baik didik y...,-1
10907,youtuber paling jujur kocak benci buat eriko l...,-1
10908,saya beli anjing,1


In [25]:
def clean_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text) 
    text = text.lower().strip()             
    words = text.split()                    
    words = [word for word in words if word not in stop_words_id]
    return ' '.join(words)

df['stemmed'] = df['stemmed'].apply(clean_text)

# **Model 1** 

(Positif/Netral/Negatif)

 **1. Labeling**

In [82]:
label = {'positif': 1, 'netral': 0, 'negatif': -1}
df['sentiment'] = df['sentiment'].map(label)

In [26]:
X = df['stemmed']
y = df['sentiment']

In [14]:
# print(f"Jumlah sampel di X: {len(X)}")
# print(f"Jumlah sampel di y: {len(y)}")
# print(f"Contoh data di X: {X[:5]}")
# print(f"Contoh data di y: {y[:5]}")

**2. TF-IDF Vectorization**

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [28]:
vectorizer = TfidfVectorizer(stop_words=stop_words_id)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)




**3. Pembuatan Model**

In [29]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)


**Pengecekan Akurasi**

In [30]:
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8065994500458296

Classification Report:
               precision    recall  f1-score   support

          -1       0.88      0.48      0.62       718
           1       0.79      0.97      0.87      1464

    accuracy                           0.81      2182
   macro avg       0.84      0.72      0.74      2182
weighted avg       0.82      0.81      0.79      2182



In [31]:
import dill

with open('model_naive_bayes.sav', 'wb') as model_file:
    dill.dump(model, model_file)

with open('tfidf_vectorizer.sav', 'wb') as vectorizer_file:
    dill.dump(vectorizer, vectorizer_file)


**4.Prediksi Kalimat Baru**

In [32]:
def prediksiKalimatBaru(text):
    teksBaru = vectorizer.transform([text])
    prediksi = model.predict(teksBaru)

    if prediksi[0] == 1: 
        sentiment = "positif"
    elif prediksi[0] == 0: 
        sentiment = "netral"
    else: 
        sentiment = "negatif"

    return sentiment

In [132]:
contohKalimat = "saya beli anjing"
print(f"Teks: '{contohKalimat}' => Sentimen: {prediksiKalimatBaru(contohKalimat)}")

Teks: 'saya beli anjing' => Sentimen: positif


# **Model 2**
(kasar/tidak kasar)

**1.Labeling**

In [133]:
with open('kamus_kasar.json', 'r') as file:
    kamus = json.load(file)

def labeling(text, kamus):
    words = set(text.split())
    kata_kasar = set(kamus.keys())  
    if words & kata_kasar:
        return 'kasar'
    else:
        return 'tidak kasar'
    
df['labeling'] = df['stemmed'].astype(str).apply(lambda x: labeling(x, kamus))
df.to_csv('labeled.csv', encoding='latin1')
df

Unnamed: 0,stemmed,sentiment,labeling
0,benci bacot nonton,-1,kasar
1,mantap kontol,-1,kasar
2,setan,-1,kasar
3,tai,-1,kasar
4,titit,-1,kasar
...,...,...,...
10915,yo anak pantek ang mah,-1,kasar
10916,youtube emang atur konten yg didik youtube atu...,-1,kasar
10917,youtuber jujur kocak benci eriko lim kocak nga...,-1,kasar
10918,beli anjing,1,kasar


Pembagian Dataset

In [134]:
from sklearn.model_selection import train_test_split

X = df['stemmed'].astype(str).dropna() 
y = df['labeling']           

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# print(f"Jumlah data latih: {len(X_train)}")
# print(f"Jumlah data uji: {len(X_test)}")

In [None]:
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


**2.TF-IDF Vectorizer**

In [135]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()

X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

Dimensi X_train_tfidf: (8736, 9545)
Dimensi X_test_tfidf: (2184, 9545)


In [136]:
vectorizer = TfidfVectorizer(
    max_features=5000,
    min_df=2,     
    max_df=0.95,
    stop_words=stop_words_id
)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)



In [137]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

In [138]:
y_pred = model.predict(X_test_tfidf)

print("Akurasi: ",{accuracy_score(y_test, y_pred),":.4f"})
print(classification_report(y_test, y_pred))

Akurasi: 0.9176
Classification Report:
              precision    recall  f1-score   support

       kasar       0.97      0.73      0.83       613
 tidak kasar       0.90      0.99      0.95      1571

    accuracy                           0.92      2184
   macro avg       0.94      0.86      0.89      2184
weighted avg       0.92      0.92      0.91      2184



**3.Filter Kata Kasar**

In [139]:
with open('kamus_kasar.json', 'r') as file:
    kamus = json.load(file)

kata_kasar = set(kamus.keys())

def filterkataKasar(text, kamus_keys):
    words = str(text).split()
    filtered_words = [word for word in words if word in kamus_keys]
    return ' '.join(filtered_words)

df['filtered'] = df['stemmed'].apply(lambda x: filterkataKasar(x, kata_kasar))
# df.to_csv('executed.csv', index=False, encoding='latin1')
# df[['stemmed', 'Filtered_Comments']]
df

Unnamed: 0,stemmed,sentiment,labeling,filtered
0,benci bacot nonton,-1,kasar,bacot
1,mantap kontol,-1,kasar,kontol
2,setan,-1,kasar,setan
3,tai,-1,kasar,tai
4,titit,-1,kasar,titit
...,...,...,...,...
10915,yo anak pantek ang mah,-1,kasar,pantek
10916,youtube emang atur konten yg didik youtube atu...,-1,kasar,tolol
10917,youtuber jujur kocak benci eriko lim kocak nga...,-1,kasar,gila
10918,beli anjing,1,kasar,anjing


**4.Hitung Skor TF-IDF per kata kasar**

In [140]:
from sklearn.feature_extraction.text import TfidfVectorizer

def filterKataKasar(text, kata_kasar):
    words = str(text).split()
    filtered_words = [word for word in words if word in kata_kasar]
    return ' '.join(filtered_words)

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['filtered'])

features = vectorizer.get_feature_names_out()
scores = tfidf_matrix.toarray()

data = []
for doc_idx, doc_scores in enumerate(scores):
    tokens = [features[i] for i in range(len(features)) if doc_scores[i] > 0]
    tfidf_scores = [round(doc_scores[i], 2) for i in range(len(features)) if doc_scores[i] > 0]
    
    for token, score in zip(tokens, tfidf_scores):
        data.append({"Comments": token, "TF-IDF": score})

score_tfidf = pd.DataFrame(data)
score_tfidf = score_tfidf[score_tfidf['Comments'].str.len() > 0].dropna().drop_duplicates(subset='Comments')
score_tfidf.to_csv("skor_tfidf.csv", index=False)
score = pd.read_csv("skor_tfidf.csv", encoding='latin1')
score

Unnamed: 0,Comments,TF-IDF
0,bacot,1.00
1,kontol,1.00
2,setan,1.00
3,tai,1.00
4,titit,1.00
...,...,...
83,gendeng,1.00
84,perek,0.87
85,silit,0.86
86,geblek,1.00


Ambil skor TF-IDF diatas 0.5

In [141]:
data = pd.DataFrame()
data = score_tfidf[score_tfidf['TF-IDF'] > 0.5]
data.to_csv('high_tfidf.csv', index=False)

Ambil kata yang TF-IDF diatas 0.5

In [142]:
data = pd.read_csv('high_tfidf.csv')
# data = data.drop('Unnamed: 0', axis=1)
high_tfidf = set(data['Comments'])

Cek apakah ada kata TF-IDF tinggi dalam kalimat

In [143]:
def cekTFIDF(text, high_tfidf):
    words = set(text.split()) 
    if words & high_tfidf:
        return 'ada'
    else:
        return 'tidak ada'
    
df['cekTF-IDF'] = df['stemmed'].astype(str).apply(lambda x: cekTFIDF(x, high_tfidf))
df = df.to_csv('sebelum_hasil.csv', index=False)

# **3. Word Replacement**

In [144]:
with open('kamus_kasar.json', 'r') as file:
    kamus = json.load(file)

def replace_kata_kasar(text, kamus):
    words = text.split()  
    word_replacement = [kamus[key] if key in kamus else key for key in words]
    return ' '.join(word_replacement)

df = pd.read_csv('executed.csv')
df['text'] = df['stemmed'].astype(str)

df['fixed'] = df.apply(
    lambda row: replace_kata_kasar(row['text'], kamus) if row['sentiment'] != 'positif' else row['text'],
    axis=1)

df[['text', 'fixed']].to_csv('hasil.csv', encoding='latin1', index=False)
dresult = pd.read_csv('hasil.csv')
dresult

Unnamed: 0,text,fixed
0,benci bacot masih nonton,benci banyak bicara masih nonton
1,mantap kontol,mantap alat kelamin pria
2,setan,makhluk jahat
3,tai,kotoran
4,titit,alat kelamin pria
...,...,...
10915,yo lah anak pantek ang mah,yo lah anak sialan ang mah
10916,youtube emang atur buat konten yg baik didik y...,youtube emang atur buat konten yg baik didik y...
10917,youtuber paling jujur kocak benci buat eriko l...,youtuber paling jujur kocak benci buat eriko l...
10918,saya beli anjing,saya beli hewan ternak


Penggunaan index labeling sebagai pengecekan akurasi

In [161]:
df = pd.read_csv('sebelum_hasil.csv')
dresult['labeling'] = df['labeling']
dresult

Unnamed: 0,text,fixed,labeling
0,benci bacot masih nonton,benci banyak bicara masih nonton,kasar
1,mantap kontol,mantap alat kelamin pria,kasar
2,setan,makhluk jahat,kasar
3,tai,kotoran,kasar
4,titit,alat kelamin pria,kasar
...,...,...,...
10915,yo lah anak pantek ang mah,yo lah anak sialan ang mah,kasar
10916,youtube emang atur buat konten yg baik didik y...,youtube emang atur buat konten yg baik didik y...,kasar
10917,youtuber paling jujur kocak benci buat eriko l...,youtuber paling jujur kocak benci buat eriko l...,kasar
10918,saya beli anjing,saya beli hewan ternak,kasar


In [148]:
X = dresult['text'] 
y = dresult['fixed']    

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [149]:
vectorizer = TfidfVectorizer(
    max_features=5000,
    min_df=2,     
    max_df=0.95,
    stop_words=stop_words_id
)
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)



In [150]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

In [151]:
y_pred = model.predict(X_test_tfidf)

print(f"Akurasi: {accuracy_score(y_test, y_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Akurasi: 0.0027
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      precision    recall  f1-score   support

                                                                                                                                                                                                                         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# **Eksekusi dalam input**

Testing Function

In [None]:
def execute(word):
    word = clean_data(word)
    word = normalize(word)
    word = stemming(word)
    word = translate_id(word)
    word = replace_kata_kasar(word, kamus)
    return word

contoh = 'Dasar manusia tolol'
execute(contoh)

## **Saving Function**

In [1]:
%pip install dill

Note: you may need to restart the kernel to use updated packages.


In [43]:
import dill
df = pd.DataFrame()

def clean_data(text):
    import re
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)
    text = re.sub(r'#\w+', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text
with open('clean_data.sav', 'wb') as file:
    dill.dump(clean_data, file)

def load_normalization_dict():
    with open('normalization_dict.json', 'r') as file:
        normalization_dict = json.load(file)
    return normalization_dict

def normalize(text):
    import re, json
    def load_normalization_dict():
        with open('normalization_dict.json', 'r') as file:
            normalization_dict = json.load(file)
        return normalization_dict
    normalization_dict = load_normalization_dict()
    for word, replacement in normalization_dict.items():
        pattern = r'\b' + re.escape(word) + r'\b'
        text = re.sub(pattern, replacement, text)
    return text
with open('normalize.sav', 'wb') as file:
    dill.dump(normalize, file)

def stemming(text_cleaning):
    from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    return stemmer.stem(text_cleaning)
with open('stemming.sav', 'wb') as file:
    dill.dump(stemming, file)

def translate_id(text):
    from googletrans import Translator
    try:
        translator = Translator(from_lang='en', to_lang="id" )
        translation = translator.translate(text)
        return translation
    except Exception as e:
        print(f"Error in translation: {e}")
        return text
with open('translate_id.sav', 'wb') as file:
    dill.dump(translate_id, file)

def vectorizer(text):
    from sklearn.feature_extraction.text import TfidfVectorizer
    vectorizer = TfidfVectorizer()
    vectorizer.fit(text)
    return vectorizer
with open('vectorizer.sav', 'wb') as file:
    dill.dump(vectorizer, file)

def prediksiKalimatBaru(text):
    teksBaru = vectorizer.transform([text])
    prediksi = model.predict(teksBaru)
    if prediksi[0] == 1: 
        sentiment = "positif"
    elif prediksi[0] == 0: 
        sentiment = "netral"
    else: 
        sentiment = "negatif"
    return sentiment.join(df['sentiment'])
with open('prediksiKalimatBaru.sav', 'wb') as file:
    dill.dump(prediksiKalimatBaru, file)
    
def replace_kata_kasar(text):
    import json
    with open('kamus_kasar.json', 'r') as file:
        kamus = json.load(file)
    kata_kasar = set(kamus.keys())
    words = text.split()
    word_replacement = [kamus[key] if key in kata_kasar else key for key in words]
    return ' '.join(word_replacement)

with open('replace_kata_kasar.sav', 'wb') as file:
    dill.dump(replace_kata_kasar, file)

In [None]:
import dill

with open('clean_data.sav', 'wb') as file:
    dill.dump(clean_data, file)

with open('normalization_dict.json', 'r') as file:
        normalization_dict = json.load(file)

with open('normalize.sav', 'wb') as file:
    dill.dump(normalize, file)

with open('stemming.sav', 'wb') as file:
    dill.dump(stemming, file)

with open('translate_id.sav', 'wb') as file:
    dill.dump(translate_id, file)

with open('vectorizer.sav', 'wb') as file:
    dill.dump(vectorizer, file)


In [33]:
with open('model_naive_bayes.sav', 'rb') as model_file:
    loaded_model = dill.load(model_file)

with open('tfidf_vectorizer.sav', 'rb') as vectorizer_file:
    loaded_vectorizer = dill.load(vectorizer_file)


In [55]:
def prediksiSentimen(text):
    teksBaru = loaded_vectorizer.transform([text])
    prediksi = loaded_model.predict(teksBaru)
    sentiment = "positif" if prediksi[0] == 1 else "netral" if prediksi[0] == 0 else "negatif"
    return sentiment
with open('prediksiSentimen.sav', 'wb') as file:
    dill.dump(prediksiSentimen, file)

sentiment = prediksiSentimen('bodoh kali kau ini')
print(f"Sentimen: {sentiment}")


Sentimen: negatif


In [52]:
def prediksiKalimatBaru(text_input):
    import dill
    text_input = text_input.astype(str)
    with open('tfidf_vectorizer.sav', 'rb') as vectorizer_file:
        loaded_vectorizer = dill.load(vectorizer_file)
    with open('model_naive_bayes.sav', 'rb') as model_file:
        loaded_model = dill.load(model_file)

    cleaned_text = clean_data(text_input)
    normalized_text = normalize(cleaned_text)
    stemmed_text = stemming(normalized_text)

    teksBaru = loaded_vectorizer.transform([stemmed_text])
    prediksi = loaded_model.predict(teksBaru)
    sentiment = "positif" if prediksi[0] == 1 else "netral" if prediksi[0] == 0 else "negatif"

    if sentiment != "positif":
        with open('replace_kata_kasar.sav', 'rb') as file:
            replace_kata_kasar = dill.load(file)
        final_text = replace_kata_kasar(stemmed_text)
    else:
        final_text = stemmed_text
    print(f"Processed Text: {final_text}")
    print(f"Sentiment: {sentiment}")
    
with open('prediksiKalimatBaru.sav', 'wb') as file:
    dill.dump(prediksiKalimatBaru, file)


In [53]:
def prediksiKalimatBaru(text):
    with open('tfidf_vectorizer.sav', 'rb') as vectorizer_file:
        loaded_vectorizer = dill.load(vectorizer_file)
    with open('model_naive_bayes.sav', 'rb') as model_file:
        loaded_model = dill.load(model_file)
    teksBaru = loaded_vectorizer.transform([text])
    prediksi = loaded_model.predict(teksBaru)
    sentiment = "positif" if prediksi[0] == 1 else "netral" if prediksi[0] == 0 else "negatif"
    return sentiment
    
with open('prediksiKalimatBaru.sav', 'wb') as file:
    dill.dump(prediksiKalimatBaru, file)
