Sistem Rekomendasi Improvement Plan

Import Library

In [None]:
pip install Sastrawi

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
import nltk
from nltk.tokenize import word_tokenize
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
nltk.download('punkt')

Data Wrangling


In [None]:
df = pd.read_excel('datasets\INI.xlsx', header=1)
df.drop(columns=['NO'], inplace=True)

df['Division'].fillna(method='ffill', inplace=True)
df['Project Title'].fillna(method='ffill', inplace=True)
df['Location'].fillna(method='ffill', inplace=True)
df['Department'].fillna(method='ffill', inplace=True)

df['Location'] = df['Location'].str.capitalize()

df

In [None]:
df.describe()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
df['Location'].value_counts().plot.pie(autopct='%1.1f%%')
plt.title('Proporsi Data Improvement Berdasarkan Lokasi')
plt.ylabel('')
plt.show()

Data Preproccessing

In [None]:
stopword_factory = StopWordRemoverFactory()
stopwords = stopword_factory.get_stop_words()

In [None]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word.isalpha()]
    tokens = [word for word in tokens if word not in stopwords]
    return ' '.join(tokens)

In [None]:
df['Improve'] = df['Improve'].apply(preprocess_text)

In [None]:
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Improve'])
tfidf_matrix_normalized = normalize(tfidf_matrix)

Sistem Rekomendasi

In [None]:
num_projects = len(df['Project Title'].unique())
num_locations = len(df['Location'].unique())

project_input = Input(shape=(1,))
location_input = Input(shape=(1,))

project_embedding = Embedding(num_projects, 50)(project_input)
location_embedding = Embedding(num_locations, 50)(location_input)

project_flatten = Flatten()(project_embedding)
location_flatten = Flatten()(location_embedding)

concatenated = Concatenate()([project_flatten, location_flatten])

dense1 = Dense(128, activation='relu')(concatenated)
output = Dense(tfidf_matrix_normalized.shape[1], activation='sigmoid')(dense1)

model = Model(inputs=[project_input, location_input], outputs=output)
model.compile(optimizer=Adam(), loss='binary_crossentropy')

In [None]:
split_idx = int(0.8 * len(df))  # 80% data train, 20% data validasi
train_data = {
    'Project Title': df['Project Title'].iloc[:split_idx],
    'Location': df['Location'].iloc[:split_idx]
}
train_labels = tfidf_matrix_normalized[:split_idx]

validation_data = {
    'Project Title': df['Project Title'].iloc[split_idx:],
    'Location': df['Location'].iloc[split_idx:]
}
validation_labels = tfidf_matrix_normalized[split_idx:]

In [None]:
model.fit([train_data['Project Title'], 
           train_data['Location']], 
           train_labels, 
           epochs=10, 
           batch_size=32, 
           validation_data=([validation_data['Project Title'], 
                             validation_data['Location']], 
                             validation_labels))


In [None]:
def recommend_improvement(project_title, location):
    project_idx = df[df['Project Title'] == project_title].index
    location_idx = df[df['Location'] == location].index

    prediction = model.predict([np.array(project_idx), np.array(location_idx)])
    recommended_improvement_idx = np.argsort(prediction[0])[::-1][:5]

    return df.iloc[recommended_improvement_idx]['Improve']

Contoh 

In [None]:
project_title_input = input("Masukkan judul proyek: ")
location_input = input("Masukkan lokasi proyek: ")

recommended_improvements = recommend_improvement(project_title_input, location_input)
print("Rekomendasi Improvement:")
for idx, improvement in enumerate(recommended_improvements, 1):
    print(f"{idx}. {improvement}")