# Building Recommendation System

## Import the Packages

In [21]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

file_path = 'Data/comments.csv'
data = pd.read_csv(file_path)


# Fill missing values in necessary columns
data['productname'] = data['productname'].fillna("Unknown")
data['rating'] = data['rating'].fillna("Unknown")
data['category'] = data['category'].fillna("Unknown")

# Encode the 'category' column into numerical labels
label_encoder_category = LabelEncoder()
data['category_encoded'] = label_encoder_category.fit_transform(data['category'])

# Encode the 'productname' column into numerical labels
label_encoder_product = LabelEncoder()
data['product_encoded'] = label_encoder_product.fit_transform(data['productname'])

# Optionally, convert ratings to numerical values (e.g., "bintang 5" → 5)
rating_mapping = {
    "bintang 5": 5,
    "bintang 4": 4,
    "bintang 3": 3,
    "bintang 2": 2,
    "bintang 1": 1
}
data['rating_numeric'] = data['rating'].map(rating_mapping)

# Drop rows with missing values in critical columns (if necessary)
data = data.dropna(subset=['rating_numeric'])

# Display the processed data
data.head()

  data = pd.read_csv(file_path)


Unnamed: 0,productid,productname,comment,rating,ratingangka,userid,user_id,category,category_encoded,product_encoded,rating_numeric
0,1,"herbisida obat pembasmi rumput liar, gulma, al...","produknya bagus. maka-nya aku beli lagi, nih....",bintang 5,5,1,Bambang,Pestisida & Obat Pertanian,4,189,5.0
1,1,"herbisida obat pembasmi rumput liar, gulma, al...",AKU SUKA PRODUKNYA. MANJUR...!!!,bintang 5,5,1,Bambang,Pestisida & Obat Pertanian,4,189,5.0
2,1,"herbisida obat pembasmi rumput liar, gulma, al...",Sudah sampai. Belum sempat dicoba. Semoga sesu...,bintang 5,5,1,Bambang,Pestisida & Obat Pertanian,4,189,5.0
3,5,"obat pembasmi pohon dan akar, racun pohon, sta...",Packing rapih dan sesuai deskripsi,bintang 5,5,1,Bambang,Pestisida & Obat Pertanian,4,191,5.0
4,5,"obat pembasmi pohon dan akar, racun pohon, sta...","Barang sudah diterima, kurir nya cepat",bintang 5,5,1,Bambang,Pestisida & Obat Pertanian,4,191,5.0


In [22]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Define input dimensions
num_products = data['product_encoded'].max() + 1
num_categories = data['category_encoded'].max() + 1
embedding_dim = 16  # Dimension of the embedding space

# Define input layers
product_input = tf.keras.Input(shape=(1,), name='product_input')
category_input = tf.keras.Input(shape=(1,), name='category_input')

# Define embedding layers
product_embedding = tf.keras.layers.Embedding(num_products, embedding_dim, name='product_embedding')(product_input)
category_embedding = tf.keras.layers.Embedding(num_categories, embedding_dim, name='category_embedding')(category_input)

# Flatten embeddings
product_flatten = tf.keras.layers.Flatten()(product_embedding)
category_flatten = tf.keras.layers.Flatten()(category_embedding)

# Concatenate embeddings
concat = tf.keras.layers.Concatenate()([product_flatten, category_flatten])

# Add dense layers
dense_1 = tf.keras.layers.Dense(128, activation='relu')(concat)
dense_2 = tf.keras.layers.Dense(64, activation='relu')(dense_1)
output = tf.keras.layers.Dense(1, activation='linear', name='rating_output')(dense_2)

# Define the model
model = tf.keras.Model(inputs=[product_input, category_input], outputs=output)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Prepare the training and testing data
train_x = [train_data['product_encoded'], train_data['category_encoded']]
train_y = train_data['rating_numeric']

test_x = [test_data['product_encoded'], test_data['category_encoded']]
test_y = test_data['rating_numeric']

# Train the model
history = model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=10, batch_size=32)

# Save the model
model.save('content_based_model.h5')

print("Model training complete and saved!")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model training complete and saved!


In [23]:
import numpy as np

# Load the trained model
model = tf.keras.models.load_model('content_based_model.h5')

# Function to generate recommendations
def generate_recommendations(user_preferred_category, top_n=5):
    # Encode the user's preferred category
    category_encoded = label_encoder_category.transform([user_preferred_category])[0]
    
    # Create a list of all product IDs
    all_products = data['product_encoded'].unique()
    
    # Prepare input data for the model
    category_inputs = np.full_like(all_products, category_encoded)
    
    # Predict ratings for all products in the preferred category
    predicted_ratings = model.predict([all_products, category_inputs])
    
    # Combine products and predicted ratings
    product_scores = list(zip(all_products, predicted_ratings.flatten()))
    
    # Sort products by predicted rating in descending order
    product_scores = sorted(product_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top-N recommended products
    top_products = product_scores[:top_n]
    
    # Decode product names and return the recommendations
    recommended_products = [
        (label_encoder_product.inverse_transform([product_id])[0], score)
        for product_id, score in top_products
    ]
    return recommended_products

# Example: Generate recommendations for a preferred category
user_category = "Pestisida & Obat Pertanian"  # Replace with user-preferred category
recommendations = generate_recommendations(user_category)

# Display recommendations
print(f"Top recommendations for category '{user_category}':")
for product, score in recommendations:
    print(f"{product}: Predicted Rating {score:.2f}")


Top recommendations for category 'Pestisida & Obat Pertanian':
SELANG DRIP IRIGASI 3/4" 180 METER 160 MIKRON LEBAR 4 CM TANI LESTARI: Predicted Rating 5.10
100ml Pestisida Organik Neem Oil /Minyak Mimba dari Biosfer Organik: Predicted Rating 5.08
DRIP TETES BOTOL Water Cone Drip IRIGASI TETES Kran Otomatis Botol - Drip belimbing: Predicted Rating 5.04
Connector Offtake Selang Drip: Predicted Rating 5.03
Wortel Berastagi Baby Konvensional 500 gram Sayurbox: Predicted Rating 4.99
Top recommendations for category 'Pestisida & Obat Pertanian':
SELANG DRIP IRIGASI 3/4" 180 METER 160 MIKRON LEBAR 4 CM TANI LESTARI: Predicted Rating 5.10
100ml Pestisida Organik Neem Oil /Minyak Mimba dari Biosfer Organik: Predicted Rating 5.08
DRIP TETES BOTOL Water Cone Drip IRIGASI TETES Kran Otomatis Botol - Drip belimbing: Predicted Rating 5.04
Connector Offtake Selang Drip: Predicted Rating 5.03
Wortel Berastagi Baby Konvensional 500 gram Sayurbox: Predicted Rating 4.99


In [10]:
import numpy as np

# Load the trained model
model = tf.keras.models.load_model('content_based_model.h5')

# Function to generate recommendations
def generate_recommendations(user_preferred_category, top_n=10):
    # Encode the user's preferred category
    category_encoded = label_encoder_category.transform([user_preferred_category])[0]
    
    # Create a list of all product IDs
    all_products = data['product_encoded'].unique()
    
    # Prepare input data for the model
    category_inputs = np.full_like(all_products, category_encoded)
    
    # Predict ratings for all products in the preferred category
    predicted_ratings = model.predict([all_products, category_inputs])
    
    # Combine products and predicted ratings
    product_scores = list(zip(all_products, predicted_ratings.flatten()))
    
    # Sort products by predicted rating in descending order
    product_scores = sorted(product_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top-N recommended products
    top_products = product_scores[:top_n]
    
    # Decode product names and return the recommendations
    recommended_products = [
        (label_encoder_product.inverse_transform([product_id])[0], score)
        for product_id, score in top_products
    ]
    return recommended_products

# Example: Generate recommendations for a preferred category
user_category = "Produk Pertanian Segar"  # Replace with user-preferred category
recommendations = generate_recommendations(user_category)

# Display recommendations
print(f"Top recommendations for category '{user_category}':")
for product, score in recommendations:
    print(f"{product}: Predicted Rating {score:.2f}")


Top recommendations for category 'Produk Pertanian Segar':
SELANG DRIP IRIGASI 3/4" 180 METER 160 MIKRON LEBAR 4 CM TANI LESTARI: Predicted Rating 4.92
100ml Pestisida Organik Neem Oil /Minyak Mimba dari Biosfer Organik: Predicted Rating 4.88
Dop 3 in PVC / Dop 3" / Tutup Pipa: Predicted Rating 4.84
Connector Offtake Selang Drip: Predicted Rating 4.83
KIRIM INSTAN Paket Sayur Potong Sop Lodeh Capcay Sayur Asem - Sop: Predicted Rating 4.82
BIJI MAHONI 1 KG KERING SUPER REMPAH BU RISMA - 100 gram: Predicted Rating 4.82
DRIP TETES BOTOL Water Cone Drip IRIGASI TETES Kran Otomatis Botol - Drip belimbing: Predicted Rating 4.82
Wortel Berastagi Baby Konvensional 500 gram Sayurbox: Predicted Rating 4.80
Booster Pump Waterplus BR 220 DPA Mesin Pendorong Air Otomatis: Predicted Rating 4.77
Bumbu Rempah Asli Daribumi - Ketumbar Bubuk: Predicted Rating 4.77
