In [16]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score
import pickle
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
import string
import pandas as pd
import pymysql

In [17]:
def read_mysql_table(table, host='localhost', user='root', password='', database='review'):
    # Establish a connection to the MySQL database
    connection = pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database
    )
    
    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()
    
    query = f"SELECT * FROM {table}"
    cursor.execute(query)
    result = cursor.fetchall()
    
    # Convert the result to a Pandas DataFrame
    df = pd.DataFrame(result)
    
    # Assign column names based on the cursor description
    df.columns = [column[0] for column in cursor.description]
    
    # Close the cursor and the database connection
    cursor.close()
    connection.close()
    
    return df

In [18]:
table_name = 'input_review'
df = read_mysql_table(table_name)
df.head()

Unnamed: 0,id_review,nama,tanggal,review
0,29,sadam,1999-02-21,gyat


In [19]:
# text preprocessing
def preprocess_text(content):
    import nltk
    import re
    nltk.download('stopwords')
    nltk.download('punkt')

    # filtering

    text = re.sub(r'\W', ' ', str(content))
    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', content)
    text = re.sub(r'\^[a-zA-Z]\s+', ' ', content)
    text = re.sub(r'\s+', ' ', content, flags=re.I)
    text = re.sub(r'^b\s+', '', content)
    text = text.lower() # case folding

    # Tokenisasi
    tokens = word_tokenize(text)

    # Menghapus stopwords
    stop_words = set(stopwords.words('indonesian'))
    tokens = [word for word in tokens if word.lower() not in stop_words]

    # Menggabungkan kembali tokens menjadi kalimat
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

# Melakukan preprocessing pada semua ulasan
df['preprocessed_text'] = df['review'].apply(preprocess_text)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\zmaul\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\zmaul\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [20]:
ulasan_bersih = df['preprocessed_text'] 

In [6]:
ulasan_bersih

0    gyat
Name: preprocessed_text, dtype: object

In [21]:
# Specify the file path of the pickle file
file_path = 'C:/Users/zmaul/OneDrive/Desktop/flask_input/mengajar-streamlit-main/TFIDF.pkl'

# Read the pickle file
with open(file_path, 'rb') as file:
    data_train = pickle.load(file)


In [22]:
data_train

['in game masuk iklan . gamenya udh diselesain ikalan wajar , tp ditengah2 game iklan .',
 'bikin game mewarnai bikin game penonton iklan ? .. 15 detik muncul iklan . lelang barang nonton iklan kadang iklanya bug nya yg ngulang nontonnya ...',
 'ukurannya iklannya , sampah',
 'elek game jelek game habis jual barang aneh penyimpanan ku cuman mb force close/keluar aplikasi',
 'iklan mulu bbrp detik',
 'iklannya kebangetan banget',
 '1 % game , 99 % iklan game nyebelin yg gw mainin',
 'main game nonton iklan',
 'bahasanya ngerti udah ganti bahasa indo bala tetep aja bahasa',
 'ditidak masuk game ... kecewa rating 1',
 'iklan susah digerakkan',
 'seru iklannya naudzubillah , klik iklan 15 detik , klik lg iklan . main gak nyaman . 10 menit install main lgsg uninstall lg . kecewa iklannya',
 'iklan pake wifi klo pake pulsa/kuota iklan seru susah permainan nya tolong game kasih iklan sedikit² iklan ga cocok anak suka marahan klo aja iklannya deh terimakasih .',
 'game nya sih bagus iklan main

In [23]:
# pembuatan vector kata
vectorizer = TfidfVectorizer()
train_vector = vectorizer.fit_transform(data_train)
reviews2 = ["".join(r) for r in ulasan_bersih]

In [15]:
reviews2

['gyat']

In [25]:
load_model = pickle.load(open('C:/Users/zmaul/OneDrive/Desktop/flask_input/mengajar-streamlit-main/svm_model.pkl','rb'))

result = []

for test in reviews2:
    test_data = [str(test)]
    test_vector = vectorizer.transform(test_data).toarray()  # Convert to dense matrix
    pred = load_model.predict(test_vector)
    result.append(pred[0])

In [26]:
result

[3]

In [27]:
from sklearn.utils.multiclass import unique_labels
unique_labels(result)

array([3], dtype=int64)

In [28]:
df['label'] = result

In [29]:
def delete_all_data_from_table(table, host='localhost', user='root', password='', database='review'):
    # Establish a connection to the MySQL database
    connection = pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database
    )
    
    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()
    
    # Delete all data from the specified table
    query = f"DELETE FROM {table}"
    cursor.execute(query)
    
    # Commit the changes
    connection.commit()
    
    # Close the cursor and the database connection
    cursor.close()
    connection.close()

In [30]:
delete_all_data_from_table('input_review')

In [31]:
def insert_df_into_hasil_model(df, host='localhost', user='root', password='', database='review'):
    # Establish a connection to the MySQL database
    connection = pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database
    )

    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()

    # Insert each row from the DataFrame into the 'hasil_model' table
    for index, row in df.iterrows():
        query = "INSERT INTO hasil_model (id_review, nama, tanggal, review, label) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(query, (row['id_review'], row['nama'], row['tanggal'], row['review'], row['label']))

    # Commit the changes
    connection.commit()

    # Close the cursor and the database connection
    cursor.close()
    connection.close()

In [32]:
insert_df_into_hasil_model(df)

In [None]:
table_name = 'hasil_model'
hasil_df = read_mysql_table(table_name)
hasil_df.to_csv('Data/hasil_model.csv')