In [1]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')
# Path to your subfolder containing the data
folder_path = '/content/drive/MyDrive/Colab Notebooks/'

Mounted at /content/drive


In [2]:
dataset_path1 = folder_path + "cleaned_data.csv"
dataset_path2 = folder_path + "one_hot_encoding.csv"

In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Memuat data
df = pd.read_csv(dataset_path1)
one_hot = pd.read_csv(dataset_path2)

# Persiapan label untuk pelatihan
rating = [0, 4, 6, 8, 10]
new_label = ['bad', 'average', 'good', 'favourite']
new_label = {i: label for i, label in zip(rating, new_label)}
df["rating_label"] = df['vote_average'].map(new_label)

# Menghapus entri tanpa label
labeled = df.dropna(subset=['rating_label'])
merged = pd.merge(labeled, one_hot, on='title')

# Mengubah label ke dalam format numerik
mapping_label = {'bad': 0, 'average': 1, 'good': 2, 'favourite': 3}
merged['rating_label'] = merged['rating_label'].replace(mapping_label)

merged.to_csv('merged.csv')

# Tokenisasi dan padding
overview_data = merged['overview'].values
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(overview_data)
overview_sequences = tokenizer.texts_to_sequences(overview_data)
max_len = 100
overview_sequences = pad_sequences(overview_sequences, maxlen=max_len)

# Split data menjadi training dan testing
X = overview_sequences
y = merged['rating_label']
train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=0.8, random_state=42)

# Definisikan model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(max_len,)),
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=100),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

# Kompilasi model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

# Latih model
model.fit(train_X, train_y, epochs=10, batch_size=32, validation_data=(test_X, test_y))

# Simpan model
model.save('model_overview.h5')

# Konversi ke TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Simpan model TensorFlow Lite
with open('model_overview.tflite', 'wb') as f:
    f.write(tflite_model)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(
