In [52]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten, Dropout
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load data from Excel
file_path = r'C:\Users\Ardian\Documents\Kuliah Ardian\Semester 7\Bangkit\Data Sistem Rekomendasi.xlsx'
excel_data = pd.read_excel(file_path, header=0)

excel_data

Unnamed: 0,Pengguna,Batik Buketan,Batik Cendrawasih,Batik Ceplok,Batik Corak Insang,Batik Dayak,Batik Gunungan,Batik Ikat Celup,Batik Kawung,Batik Lereng,Batik Mega Mendung,Batik Nitik,Batik Parang,Batik Prada,Batik Sekar,Batik Sidoluhur,Batik Truntum,Batik Tumpal
0,User 1,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1
1,User 2,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1
2,User 3,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,0
3,User 4,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,1,0
4,User 5,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,1,1
5,User 6,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1
6,User 7,1,1,0,0,0,1,1,0,1,0,0,1,1,1,1,0,1
7,User 8,1,0,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1
8,User 9,0,0,0,1,0,0,1,1,0,1,0,0,0,1,0,0,1
9,User 10,0,1,0,1,1,1,1,0,1,1,1,0,1,1,0,0,0


## Data Loading and Preprocessing

We load the data from an Excel file and preprocess it for further analysis. The data contains user ratings for different motifs. We perform the following steps:

1. **Load Data**: We load the data from an Excel file using `pandas`.
2. **Extract Data**: We extract the user information, motif list, and ratings from the loaded data.
3. **Normalize Ratings**: We normalize the ratings using `MinMaxScaler` to scale the ratings between 0 and 1.
4. **Split Data**: We split the data into training and testing sets using `train_test_split`.

In [None]:
# Extract data
users = excel_data['Pengguna']
motif_list = excel_data.columns[1:].tolist()
ratings = excel_data.iloc[:, 1:].values

# Normalize ratings
scaler = MinMaxScaler()
normalized_ratings = scaler.fit_transform(ratings)

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(
    np.arange(len(users)), normalized_ratings, test_size=0.2, random_state=42
)

## Build TensorFlow Sequential Model

We build a TensorFlow Sequential model to learn the features of the user ratings data. The model architecture is as follows:

1. **Input Layer**: The input layer has a shape equal to the number of motifs.
2. **Hidden Layers**: 
    - The first hidden layer has 64 neurons with ReLU activation and a dropout rate of 0.3.
    - The second hidden layer has 32 neurons with ReLU activation and a dropout rate of 0.3.
3. **Output Layer**: The output layer has a number of neurons equal to the number of motifs with sigmoid activation.

The model is compiled using the Adam optimizer and Mean Squared Error (MSE) loss function. The model is trained for 50 epochs with a batch size of 16 and a validation split of 0.2.

After training, we evaluate the model by predicting the ratings for the training and testing sets. The Root Mean Squared Error (RMSE) is calculated for both sets to assess the model's performance.

In [54]:
# Build TensorFlow Sequential model
num_users = len(users)
num_motifs = len(motif_list)
embedding_dim = 32  # Increased embedding dimension for better feature learning

model = Sequential([
    Dense(64, activation='relu', input_shape=(num_motifs,)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(num_motifs, activation='sigmoid')
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

# Train the model
model.fit(normalized_ratings, normalized_ratings, epochs=50, batch_size=16, verbose=1, validation_split=0.2)

# Evaluate model
train_predictions = model.predict(normalized_ratings[X_train])
test_predictions = model.predict(normalized_ratings[X_test])

train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

print(f"Train RMSE: {train_rmse:.4f}")
print(f"Test RMSE: {test_rmse:.4f}")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 195ms/step - loss: 0.2557 - mean_squared_error: 0.2557 - val_loss: 0.2529 - val_mean_squared_error: 0.2529
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.2507 - mean_squared_error: 0.2507 - val_loss: 0.2513 - val_mean_squared_error: 0.2513
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - loss: 0.2528 - mean_squared_error: 0.2528 - val_loss: 0.2500 - val_mean_squared_error: 0.2500
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - loss: 0.2465 - mean_squared_error: 0.2465 - val_loss: 0.2488 - val_mean_squared_error: 0.2488
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.2504 - mean_squared_error: 0.2504 - val_loss: 0.2476 - val_mean_squared_error: 0.2476
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.2459 - mean_squared_er

## Model Evaluation

The performance of the recommendation model is evaluated using the Root Mean Squared Error (RMSE) metric. RMSE is a commonly used measure to evaluate the accuracy of a model's predictions. It represents the square root of the average squared differences between the predicted and actual values. A lower RMSE value indicates better model performance.

- **Train RMSE**: 0.4079
- **Test RMSE**: 0.4147

The Train RMSE of 0.4079 indicates that the model's predictions on the training data are fairly accurate, with an average error of approximately 0.4079. The Test RMSE of 0.4147 shows that the model generalizes well to unseen data, with a similar level of accuracy as on the training data. The close values of Train RMSE and Test RMSE suggest that the model is not overfitting and performs consistently on both training and testing datasets.

## Testing The Model With New User

We handle the data for a new user and generate motif recommendations based on their preferences. The steps are as follows:

1. **New User Data**: We create an array representing the new user's ratings for different motifs.
2. **Normalize New User Data**: We normalize the new user's ratings using the same scaler that was used for the training data.
3. **Predict Recommendations**: We use the trained model to predict the new user's preferences for each motif.
4. **Decode Predictions**: We decode the predicted scores into a list of motifs with their corresponding scores.
5. **Sort Recommendations**: We sort the motifs based on the predicted scores in descending order.
6. **Display Top 5 Recommendations**: We print the top 5 recommended motifs for the new user.

In [None]:
import numpy as np


# Data pengguna baru
new_user_ratings = np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0]).reshape(1, -1)

# Normalisasi data pengguna baru
new_user_normalized = scaler.transform(new_user_ratings)

# Prediksi rekomendasi
predictions = model.predict(new_user_normalized)

# Decoding hasil prediksi
recommended_motifs = [(motif_list[i], predictions[0, i]) for i in range(len(motif_list))]
recommended_motifs = sorted(recommended_motifs, key=lambda x: -x[1])  # Urutkan berdasarkan skor prediksi

# Menampilkan 5 rekomendasi terbaik
print("5 Rekomendasi Motif Batik:")
for motif, score in recommended_motifs[:5]:
    print(f"{motif}: {score:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
5 Rekomendasi Motif Batik:
Batik Sekar: 0.6920
Batik Corak Insang: 0.6368
Batik Ikat Celup: 0.6065
Batik Prada: 0.6008
Batik Dayak: 0.5647


## Save the Model

After training and evaluating the recommendation model, we save the trained model to a file for future use. This allows us to load the model later without having to retrain it, saving time and computational resources. The model is saved in the HDF5 format, which is a versatile format for storing large amounts of data.

In [58]:
# Save the model
model.save(r'C:\Users\Ardian\Documents\Kuliah Ardian\Semester 7\Bangkit\recommendation_model.h5')

