<a href="https://colab.research.google.com/github/Dewangga027/CNN_5_Classification/blob/main/TA_Surya.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Tahap 1: PreProcessing Data

In [246]:
!pip install ipywidgets --quiet

In [247]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from collections import Counter
from ipywidgets import widgets, VBox, Output, Checkbox, Button, Dropdown, HBox
from IPython.display import display, Markdown

In [248]:
df = pd.read_excel ('/content/frozenfood.xlsx')
df.head()

Unnamed: 0,TGL,KODEBARA,NAMABARA,QTY,SATUAN,HARGA,JUMLAH,NAMA
0,2020-04-01,JOF-022,JOFRANS NUGGET AYAM REG. 225 GR X 24 PAK,1,DUS,240000,240000,F - (MITRAKU) TOKO SALMAINI HABABAHAN
1,2020-04-01,JOF-002,JOFRANS NUGGET AYAM REG. 500 GR X 12 PAK,1,DUS,216000,216000,F - (MITRAKU) TOKO SALMAINI HABABAHAN
2,2020-04-01,OKEY-003,OKEY STICK 500 GR X 10 PACK,1,DUS,190000,190000,F - (MITRAKU) TOKO SALMAINI HABABAHAN
3,2020-04-01,OKEY-001,OKEY SOSIS 500 GR X 20 PACK,1,DUS,380000,380000,F - (MITRAKU) TOKO SALMAINI HABABAHAN
4,2020-04-01,BLF-001,BELFOODS SOSIS AYAM 500 GR X 12 PACK,1,DUS,204000,204000,F - (MITRAKU) TOKO SALMAINI HABABAHAN


In [249]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6778 entries, 0 to 6777
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   TGL       6778 non-null   datetime64[ns]
 1   KODEBARA  6778 non-null   object        
 2   NAMABARA  6778 non-null   object        
 3   QTY       6778 non-null   int64         
 4   SATUAN    6778 non-null   object        
 5   HARGA     6778 non-null   int64         
 6   JUMLAH    6778 non-null   int64         
 7   NAMA      6778 non-null   object        
dtypes: datetime64[ns](1), int64(3), object(4)
memory usage: 423.8+ KB


In [250]:
# Cek missing value
df.isnull().sum()

Unnamed: 0,0
TGL,0
KODEBARA,0
NAMABARA,0
QTY,0
SATUAN,0
HARGA,0
JUMLAH,0
NAMA,0


In [251]:
df.dropna(inplace=True)
df['TGL'] = pd.to_datetime(df['TGL']) # convert ke tipe date

In [252]:
df_duplicate = df.copy()

In [253]:
satuan_mapping = {
    'BAL': 10,     # contoh: 1 BAL = 10 pcs
    'BALL': 10,
    'BKS': 5,
    'DUS': 12,
    'JRG': 6,
    'PAC': 5,
    'PACK': 5,
    'PAIL': 20,
    'PCS': 1
}

In [254]:
df.dropna(inplace=True)
df['TGL'] = pd.to_datetime(df['TGL'])
df['KONVERSI'] = df['SATUAN'].map(satuan_mapping)
df['JUMLAH_UNIT'] = df['QTY'] * df['KONVERSI']
df['BRAND'] = df['NAMABARA'].str.split().str[0]

In [255]:
le_nama = LabelEncoder()
df['NAMA_encoded'] = le_nama.fit_transform(df['NAMA'])
le_kodebara = LabelEncoder()
df['KODEBARA_encoded'] = le_kodebara.fit_transform(df['KODEBARA'])
le_brand = LabelEncoder()
df['BRAND_encoded'] = le_brand.fit_transform(df['BRAND'])

In [256]:
# Split data into train and test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [257]:
# Ekstrak waktu
df_duplicate['Bulan'] = df_duplicate['TGL'].dt.month
df_duplicate['Tahun'] = df_duplicate['TGL'].dt.year

In [258]:
df_duplicate

Unnamed: 0,TGL,KODEBARA,NAMABARA,QTY,SATUAN,HARGA,JUMLAH,NAMA,Bulan,Tahun
0,2020-04-01,JOF-022,JOFRANS NUGGET AYAM REG. 225 GR X 24 PAK,1,DUS,240000,240000,F - (MITRAKU) TOKO SALMAINI HABABAHAN,4,2020
1,2020-04-01,JOF-002,JOFRANS NUGGET AYAM REG. 500 GR X 12 PAK,1,DUS,216000,216000,F - (MITRAKU) TOKO SALMAINI HABABAHAN,4,2020
2,2020-04-01,OKEY-003,OKEY STICK 500 GR X 10 PACK,1,DUS,190000,190000,F - (MITRAKU) TOKO SALMAINI HABABAHAN,4,2020
3,2020-04-01,OKEY-001,OKEY SOSIS 500 GR X 20 PACK,1,DUS,380000,380000,F - (MITRAKU) TOKO SALMAINI HABABAHAN,4,2020
4,2020-04-01,BLF-001,BELFOODS SOSIS AYAM 500 GR X 12 PACK,1,DUS,204000,204000,F - (MITRAKU) TOKO SALMAINI HABABAHAN,4,2020
...,...,...,...,...,...,...,...,...,...,...
6773,2020-09-30,OKEY-001,OKEY SOSIS 500 GR X 20 PACK,3,DUS,380000,1140000,F - ACHIKA JAYA,9,2020
6774,2020-09-30,MIN-001,MINAKU OTAK OTAK 500 GR X 20 PACK,1,DUS,360000,360000,F - ACHIKA JAYA,9,2020
6775,2020-09-30,INDO-002,INDOESKRIM 8 LTR VANILLA,30,PAIL,145000,4350000,TOKO CITA RASA BARU,9,2020
6776,2020-09-30,DEL-001,DELMONTE CHILI EXT HOT PCH 1 KG X 10 PCS,3,DUS,175000,525000,TOKO CITA RASA BARU,9,2020


# Tahap 3: Membangun Matriks


In [259]:
user_item_matrix = train_df.pivot_table(index='NAMA_encoded', columns='KODEBARA_encoded', values='JUMLAH_UNIT', aggfunc='sum', fill_value=0)
item_user_matrix = user_item_matrix.T
brand_user_matrix = train_df.pivot_table(index='NAMA_encoded', columns='BRAND_encoded', values='JUMLAH_UNIT', aggfunc='sum', fill_value=0)

In [260]:
user_similarity = cosine_similarity(user_item_matrix)
item_similarity = cosine_similarity(item_user_matrix)
brand_similarity = cosine_similarity(brand_user_matrix)

In [261]:
model_knn_users = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=min(8, len(user_item_matrix))).fit(user_item_matrix)
model_knn_items = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=min(8, len(item_user_matrix))).fit(item_user_matrix)
model_knn_brands = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=min(8, len(brand_user_matrix))).fit(brand_user_matrix)

In [273]:
missing_in_train = set(test_df['KODEBARA_encoded']) - set(train_df['KODEBARA_encoded'])
print("Produk hilang dari train:", len(missing_in_train))


Produk hilang dari train: 14


# Tahap 4: Recommender Functions

In [262]:
def recommend_by_user(selected_items, top_n=5, threshold=0.5):
    relevant_users = set()
    for item in selected_items:
        if item in user_item_matrix.columns:
            buyers = user_item_matrix[user_item_matrix[item] > 0].index.tolist()
            relevant_users.update(buyers)
    if not relevant_users:
        return pd.DataFrame()

    recommended_items = Counter()
    user_indices = list(user_item_matrix.index)
    similarity_df = pd.DataFrame(user_similarity, index=user_indices, columns=user_indices)

    for user_id in relevant_users:
        if user_id in user_indices:
            distances, indices = model_knn_users.kneighbors(user_item_matrix.loc[user_id].values.reshape(1, -1))
            for neighbor_idx in indices.flatten()[1:]:
                neighbor_id = user_indices[neighbor_idx]
                if similarity_df.loc[user_id, neighbor_id] > threshold:
                    purchased_items = user_item_matrix.loc[neighbor_id]
                    for item, value in purchased_items[purchased_items > 0].items():
                        recommended_items[item] += value
    top_items = [int(item) for item, _ in recommended_items.most_common(top_n)]
    return df[df['KODEBARA_encoded'].isin(top_items)][['KODEBARA', 'NAMABARA']].drop_duplicates()

In [263]:
def recommend_by_item(selected_items, top_n=5):
    recommended_items = Counter()
    for item in selected_items:
        if item in item_user_matrix.index:
            distances, indices = model_knn_items.kneighbors(item_user_matrix.loc[item].values.reshape(1, -1))
            for idx in indices.flatten()[1:]:
                neighbor_item = item_user_matrix.index[idx]
                recommended_items[neighbor_item] += 1
    top_items = [int(item) for item, _ in recommended_items.most_common(top_n)]
    return df[df['KODEBARA_encoded'].isin(top_items)][['KODEBARA', 'NAMABARA']].drop_duplicates()

In [264]:
def recommend_by_brand(selected_items, top_n=5):
    selected_brands = train_df[train_df['KODEBARA_encoded'].isin(selected_items)]['BRAND_encoded'].unique()
    if len(selected_brands) == 0:
        return pd.DataFrame()

    recommended_items = Counter()
    for brand in selected_brands:
        if brand in brand_user_matrix.columns:
            users = brand_user_matrix[brand_user_matrix[brand] > 0].index.tolist()
            for user_id in users:
                purchased_items = user_item_matrix.loc[user_id]
                for item, value in purchased_items[purchased_items > 0].items():
                    recommended_items[item] += value
    top_items = [int(item) for item, _ in recommended_items.most_common(top_n)]
    return df[df['KODEBARA_encoded'].isin(top_items)][['KODEBARA', 'NAMABARA']].drop_duplicates()


# Tahap 5: Interactive UI


In [265]:
display(Markdown("### Pilih preferensi dan maksimal 5 produk untuk mendapatkan rekomendasi"))

### Pilih preferensi dan maksimal 5 produk untuk mendapatkan rekomendasi

In [266]:
product_options = df[['NAMABARA', 'KODEBARA_encoded']].drop_duplicates().sort_values('NAMABARA')
checkboxes = [Checkbox(description=row['NAMABARA'], value=False) for _, row in product_options.iterrows()]
checkbox_map = {cb.description: row['KODEBARA_encoded'] for cb, (_, row) in zip(checkboxes, product_options.iterrows())}

preferensi_dropdown = Dropdown(options=['user', 'item', 'brand'], description="Preferensi:")
output_area = Output()

In [267]:
def on_run_clicked(b):
    output_area.clear_output()
    selected_encoded = [checkbox_map[cb.description] for cb in checkboxes if cb.value]
    preferensi = preferensi_dropdown.value

    if len(selected_encoded) == 0:
        with output_area:
            print("❗ Silakan pilih minimal satu produk terlebih dahulu.")
        return
    elif len(selected_encoded) > 5:
        with output_area:
            print("⚠️ Maksimal hanya boleh memilih 5 produk.")
        return

    if preferensi == 'user':
        rekom = recommend_by_user(selected_encoded)
    elif preferensi == 'item':
        rekom = recommend_by_item(selected_encoded)
    elif preferensi == 'brand':
        rekom = recommend_by_brand(selected_encoded)
    else:
        rekom = pd.DataFrame()

    with output_area:
        if rekom.empty:
            print("❗ Tidak ada rekomendasi ditemukan.")
        else:
            print(f"📦 Rekomendasi berdasarkan preferensi '{preferensi}':")
            display(rekom)

            # === Evaluasi otomatis ===
            true_users = test_df[test_df['KODEBARA_encoded'].isin(selected_encoded)]['NAMA_encoded'].unique()
            if len(true_users) > 0:
                true_items = test_df[test_df['NAMA_encoded'].isin(true_users)]['KODEBARA_encoded'].unique()
                predicted_items = df[df['KODEBARA'].isin(rekom['KODEBARA'])]['KODEBARA_encoded'].unique()
                print("\n📊 Evaluasi Prediksi:")
                evaluate_model(true_items, predicted_items)
            else:
                print("\n📊 Tidak ada user cocok di data test untuk evaluasi.")

In [268]:
run_button = Button(description="Tampilkan Rekomendasi")
run_button.on_click(on_run_clicked)

In [269]:
product_checkboxes_ui = VBox(checkboxes[:20])  # Batasi jumlah yang ditampilkan agar tidak berat

In [270]:
display(VBox([preferensi_dropdown, product_checkboxes_ui, run_button, output_area]))

VBox(children=(Dropdown(description='Preferensi:', options=('user', 'item', 'brand'), value='user'), VBox(chil…

# Tahap 6 : Evaluation (Precision, Recall, F1)[link text](https://)

In [271]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [272]:
def evaluate_model(true_encoded, predicted_encoded):
    true_set = set(true_encoded)
    predicted_set = set(predicted_encoded)

    tp = len(true_set & predicted_set)
    fp = len(predicted_set - true_set)
    fn = len(true_set - predicted_set)

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")