In [57]:
########################################
# Armut Ürün Tavsiye Sistemi - Association Rule Learning (ARL)
########################################

In [58]:
# Problem Tanımı:
# Türkiye’nin en büyük online hizmet platformu olan Armut’un kullanıcı-hizmet geçmişine dayalı olarak,
# hizmetler arası ilişki kurarak kullanıcıya uygun hizmet önerisinde bulunulması amaçlanmaktadır.

In [59]:
########################################
# Kütüphaneler
########################################

In [60]:
import pandas as pd
pd.set_option('display.max_columns', None)  # Tüm sütunları gösterebilmek için ayar
from mlxtend.frequent_patterns import apriori, association_rules    # ARL algoritmaları için gerekli kütüphane

In [61]:
########################################
# Veri Yükleme ve Ön İşleme
########################################

In [62]:
# Veri seti yüklenir
df_ = pd.read_csv("/content/armut_data.csv")

In [63]:
df = df_.copy()
df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate
0,25446,4,5,2017-08-06 16:11:00
1,22948,48,5,2017-08-06 16:12:00
2,10618,0,8,2017-08-06 16:13:00
3,7256,9,4,2017-08-06 16:14:00
4,25446,48,5,2017-08-06 16:16:00


In [64]:
# Eksik değer kontrolü
df.isnull().sum()

Unnamed: 0,0
UserId,0
ServiceId,0
CategoryId,0
CreateDate,0


In [65]:
# Kullanıcıların aldığı her hizmet, ServiceId ve CategoryId birleştirilerek tek bir ID haline getirilir
df["Hizmet"] = df["ServiceId"].astype(str) + "_" + df["CategoryId"].astype(str)
df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Hizmet
0,25446,4,5,2017-08-06 16:11:00,4_5
1,22948,48,5,2017-08-06 16:12:00,48_5
2,10618,0,8,2017-08-06 16:13:00,0_8
3,7256,9,4,2017-08-06 16:14:00,9_4
4,25446,48,5,2017-08-06 16:16:00,48_5


In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162523 entries, 0 to 162522
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   UserId      162523 non-null  int64 
 1   ServiceId   162523 non-null  int64 
 2   CategoryId  162523 non-null  int64 
 3   CreateDate  162523 non-null  object
 4   Hizmet      162523 non-null  object
dtypes: int64(3), object(2)
memory usage: 6.2+ MB


In [67]:
# Tarih formatı datetime objesine çevrilir
df["CreateDate"] = pd.to_datetime(df["CreateDate"])

# Ay bazlı zaman periyodu oluşturulur
df["New_Date"] = df["CreateDate"].dt.to_period("M")  # Örn: 2017-08

In [68]:
df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Hizmet,New_Date
0,25446,4,5,2017-08-06 16:11:00,4_5,2017-08
1,22948,48,5,2017-08-06 16:12:00,48_5,2017-08
2,10618,0,8,2017-08-06 16:13:00,0_8,2017-08
3,7256,9,4,2017-08-06 16:14:00,9_4,2017-08
4,25446,48,5,2017-08-06 16:16:00,48_5,2017-08


In [69]:
# Kullanıcı + Ay kombinasyonu ile sepet tanımı yapılır
df["SepetID"] = df["UserId"].astype(str) + "_" + df["New_Date"].astype(str)
df.head()

Unnamed: 0,UserId,ServiceId,CategoryId,CreateDate,Hizmet,New_Date,SepetID
0,25446,4,5,2017-08-06 16:11:00,4_5,2017-08,25446_2017-08
1,22948,48,5,2017-08-06 16:12:00,48_5,2017-08,22948_2017-08
2,10618,0,8,2017-08-06 16:13:00,0_8,2017-08,10618_2017-08
3,7256,9,4,2017-08-06 16:14:00,9_4,2017-08,7256_2017-08
4,25446,48,5,2017-08-06 16:16:00,48_5,2017-08,25446_2017-08


In [70]:
# Her satır bir sepeti, her sütun bir hizmeti temsil edecek şekilde 0-1 formatında pivot tablo oluşturulur
pivot_df = df.pivot_table(index="SepetID",
                          columns="Hizmet",
                          aggfunc="size",
                          fill_value=0)

# Sayılar 1 ve 0'a çevrilir (var-yok bilgisi)
pivot_df = pivot_df.applymap(lambda x: 1 if x > 0 else 0)

  pivot_df = pivot_df.applymap(lambda x: 1 if x > 0 else 0)


In [71]:
pivot_df.head()

Hizmet,0_8,10_9,11_11,12_7,13_11,14_7,15_1,16_8,17_5,18_4,19_6,1_4,20_5,21_5,22_0,23_10,24_10,25_0,26_7,27_7,28_4,29_0,2_0,30_2,31_6,32_4,33_4,34_6,35_11,36_1,37_0,38_4,39_10,3_5,40_8,41_3,42_1,43_2,44_0,45_6,46_4,47_7,48_5,49_1,4_5,5_11,6_7,7_3,8_5,9_4
SepetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0_2017-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0
0_2017-09,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
0_2018-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
0_2018-04,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
10000_2017-08,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [72]:
########################################
# ARL Modeli: Apriori & Association Rules
########################################

In [73]:
# En sık birlikte geçen hizmet setleri apriori algoritması ile bulunur
frequent_itemsets = apriori(pivot_df, min_support=0.01, use_colnames=True)

# Support değerine göre en sık geçen hizmetler sıralanır
frequent_itemsets.sort_values("support", ascending=False).head()



Unnamed: 0,support,itemsets
8,0.238121,(18_4)
19,0.130286,(2_0)
5,0.120963,(15_1)
39,0.067762,(49_1)
28,0.066568,(38_4)


In [74]:
# Association rules çıkarılır (antecedent → consequent)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Lift değerine göre en anlamlı kurallar sıralanır
rules.sort_values("lift", ascending=False).head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
11,(25_0),(22_0),0.042895,0.047515,0.01112,0.259247,5.456141,1.0,0.009082,1.285834,0.853324,0.140251,0.222295,0.246645
10,(22_0),(25_0),0.047515,0.042895,0.01112,0.234043,5.456141,1.0,0.009082,1.249553,0.857462,0.140251,0.199714,0.246645
19,(38_4),(9_4),0.066568,0.041393,0.010067,0.151234,3.653623,1.0,0.007312,1.129413,0.778096,0.10284,0.114584,0.197225
18,(9_4),(38_4),0.041393,0.066568,0.010067,0.243216,3.653623,1.0,0.007312,1.233418,0.757661,0.10284,0.189245,0.197225
5,(15_1),(33_4),0.120963,0.02731,0.011233,0.092861,3.400299,1.0,0.007929,1.072262,0.803047,0.081967,0.067392,0.252086


In [75]:
def arl_recommender(rules_df, product_id, rec_count=1):
    """
    Belirtilen bir hizmete (product_id) dayanarak, ilişki kuralları (association rules) üzerinden
    tavsiye edilen hizmetleri döndürür.

    Parametreler:
    - rules_df: DataFrame, association_rules fonksiyonu çıktısı
    - product_id: str, tavsiye istenen hizmet ID'si (örn: "2_0")
    - rec_count: int, döndürülecek öneri sayısı

    Returns:
    - list: Tavsiye edilen hizmet ID'lerinden oluşan liste
    """

    # Kuralları lift değerine göre azalan şekilde sırala
    # (En güçlü ilişkilere sahip kurallar üstte yer alır)
    sorted_rules = rules_df.sort_values("lift", ascending=False)

    recommendation_list = []  # Tavsiye edilecek ürünleri tutmak için boş bir liste oluştur

    # Her bir kuralın öncül (antecedents) kısmında dolaş
    for i, product in sorted_rules["antecedents"].items():
        # Önceki ürün kümesini (frozenset) listeye çevirerek dolaş
        for j in list(product):
            # Eğer öncüller arasında öneri istenen ürün varsa
            if j == product_id:
                # Bu kurala karşılık gelen sonuç (consequents) ürünleri tavsiye listesine ekle
                recommendation_list.append(list(sorted_rules.iloc[i]["consequents"]))

    # Tekrar eden ürünleri kaldırmak için set yapısını kullan
    # İç içe listeleri düzleştirip eşsiz ürünleri al
    recommendation_list = list({item for item_list in recommendation_list for item in item_list})

    # Belirtilen sayıda öneri döndür (varsayılan 1)
    return recommendation_list[:rec_count]


In [76]:
########################################
# Örnek Kullanım
########################################

In [77]:
# En son '2_0' hizmetini alan bir kullanıcıya 1 hizmet öner
arl_recommender(rules, "2_0", rec_count=1)

['13_11']