#Studi Kasus: Menerapkan K-NN untuk sistem rekomendasi produk

In [1]:
%pip install pandas numpy scikit-learn

Collecting pandas
  Using cached pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting numpy
  Downloading numpy-2.2.0-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.0-cp313-cp313-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Using cached pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl (11.3 MB)
Downloading numpy-2.2.0-cp313-cp313-macosx_14_0_arm64.whl (5.1 MB)
[2K

In [2]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler


In [4]:
# Dummy Dataset
# Produk dan fitur yang relevan
data = {
    'Product_ID': ['P1', 'P2', 'P3', 'P4', 'P5'],
    'Feature1': [3, 1, 4, 2, 5],
    'Feature2': [5, 1, 3, 4, 2],
    'Feature3': [4, 2, 5, 1, 3],
}

In [5]:
# Preferensi pelanggan (rating dummy)
customer_preferences = {
    'Customer_ID': ['C1', 'C2', 'C3', 'C4'],
    'P1': [5, 3, 4, 2],
    'P2': [3, 4, 2, 5],
    'P3': [4, 5, 3, 1],
    'P4': [1, 2, 5, 4],
    'P5': [2, 1, 4, 5],
}

In [6]:
#Convert to DataFrames
product_df = pd.DataFrame(data)
customer_df = pd.DataFrame(customer_preferences).set_index('Customer_ID')

In [7]:
# Preprocessing
# Scale product features for distance calculations
scaler = StandardScaler()
scaled_features = scaler.fit_transform(product_df.iloc[:, 1:])


In [8]:
# Implementing K-NN on Customers
# Transpose customer ratings for similarity matching
customer_ratings_matrix = customer_df.T

In [9]:
# Train KNN model for customers
knn_model = NearestNeighbors(n_neighbors=2, metric='euclidean')
knn_model.fit(customer_ratings_matrix)

In [23]:
# Transpose customer ratings for similarity matching (training input)
customer_ratings_matrix = customer_df.T

# Train KNN model for customers
knn_model = NearestNeighbors(n_neighbors=2, metric='euclidean')
knn_model.fit(customer_ratings_matrix.values.T)  # Use the transposed ratings as features

# Updated recommend_products function
def recommend_products(customer_id, customer_df, knn_model, n_recommendations=2):
    # Ensure we query using the transposed matrix (as the model expects this format)
    customer_ratings_matrix = customer_df.T
    
    # Find the index of the customer in the matrix
    customer_idx = list(customer_df.index).index(customer_id)
    customer_ratings = customer_ratings_matrix.values.T[customer_idx].reshape(1, -1)
    
    # Find nearest neighbors (most similar customers)
    distances, indices = knn_model.kneighbors(customer_ratings, n_neighbors=n_recommendations+1)  # +1 to exclude self
    
    # Get indices of similar customers excluding the customer itself
    similar_customer_indices = indices[0][1:]  # Skip the first, which is the customer itself
    similar_customers = [list(customer_df.index)[idx] for idx in similar_customer_indices]
    
    # Collect products rated highly by similar customers
    recommended_products = set()
    for similar_customer in similar_customers:
        liked_products = customer_df.loc[similar_customer][customer_df.loc[similar_customer] >= 4].index.tolist()
        recommended_products.update(liked_products)
    
    # Exclude products already highly rated by the customer
    already_liked = customer_df.loc[customer_id][customer_df.loc[customer_id] >= 4].index.tolist()
    recommended_products.difference_update(already_liked)
    
    return list(recommended_products)




In [24]:
# Test the corrected recommendation function
customer_id = 'C1'
recommendations = recommend_products(customer_id, customer_df, knn_model)

print(f"\nRecommended products for Customer {customer_id}: {recommendations}")


Recommended products for Customer C1: ['P5', 'P2', 'P4']
