In [21]:
import pandas as pd
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [22]:
dp_no_table = pd.read_csv('dp_code_match_table.csv', dtype={"department_code": "string", "department_no": "string"})
data = pd.read_csv('data_cleaned.csv', dtype={"std_no" : "string","department_no": "string","class_code": "string","class_name": "string","period": "string"})

In [23]:
def get_department_records(dep_code):
    """verilen bölüm kısaltmasının ders kayıtlarını içeren tablo döndürür"""
    return data[data.department_no == dp_no_table[dp_no_table.department_code == dep_code].reset_index().at[0, 'department_no']].groupby('std_no').class_name.apply(list).reset_index()
def get_class_code_records_as_list(df):
    """verilen tablonun class_code sütunundaki değeleri liste olarak döndürür"""
    return df.class_name.to_list()

In [31]:
def create_one_shot_df(dep_str):
    """verilen bölümün kayıtlarının oneshot table'ını oluşturur"""
    dep_records = get_department_records(dep_str)
    dep_records_class_codes_list = get_class_code_records_as_list(dep_records)
    te = TransactionEncoder()
    dep_records_oneshot = te.fit(dep_records_class_codes_list).transform(dep_records_class_codes_list)
    dep_records_oneshot_df = pd.DataFrame(dep_records_oneshot, columns=te.columns_)
    
    return dep_records_oneshot_df

In [33]:
def create_frequent_itemsets(dep_str):
    """verilen bölümün kayıtlarına göre frequent itemsets oluşturur"""
    dep_oneshot_df = create_one_shot_df(dep_str)
    dep_freq_itemsets = apriori(dep_oneshot_df, min_support = 0.01, use_colnames=True)
    return dep_freq_itemsets

In [50]:
def create_rules(dep_str):
    """verilen bölüm ve verilen ders için öneri yapar"""
    dep_freq_itemsets = create_frequent_itemsets(dep_str)
    rules = association_rules(dep_freq_itemsets, metric="lift", min_threshold=1)
    rules = rules.sort_values('confidence', ascending=False)
    rules["antecedents"] = rules["antecedents"].apply(lambda x: ', '.join(list(x))).astype("unicode")
    rules["consequents"] = rules["consequents"].apply(lambda x: ', '.join(list(x))).astype("unicode")
    rules = rules[rules["consequents"].str.contains(',') == False]
    rules["antecedents"] = rules["antecedents"].apply(lambda x: x.split(', '))
    rules["consequents"] = rules["consequents"].apply(lambda x: x.split(', '))
    rules = rules.reset_index().drop('index', axis=1)

    return rules

In [177]:
def rules_top10_without_imput(df):
    min_support = 0.5
    delta = min_support / 2
    rules = df[df['support'] > min_support]
    ##satır sayısı 3'den azsa confidence'i azalt
    while rules.shape[0] != 10 and delta > 0.001:
        if rules.shape[0] < 10:
            min_support -= delta
            delta /= 1.25
            rules = df[df['support'] > min_support]
        
        if rules.shape[0] > 10:
            min_support += delta
            delta /= 1.25
            rules = df[df['support'] > min_support]
    
    return rules

In [190]:
def rules_top10_without_imput(df):
    df = df.sort_values('support', ascending=False)
    df = df.head(10)
    df = df.sort_values('confidence', ascending=False)
    return df

In [192]:
def rules_top10(df, antecedents): # antecedents liste olacak  
    df['to_drop'] = df.apply(lambda row: set(row["antecedents"]) != set(antecedents), axis=1)
    df = df[df['to_drop'] == False].reset_index().drop(['index', 'to_drop'], axis=1)
    df = df.sort_values('support', ascending=False)
    df = df.head(10)
    df = df.sort_values('confidence', ascending=False)
    return df

In [191]:
for row in dp_no_table.iterrows():
    #tüm satırların row[1].department_code'u için rules_top10_without_input çagırılacak
    #çıkan sonuç dosyaya yazılcak
    df = create_rules(row[1].department_code)
    top10 = rules_top10_without_imput(df)
    top10.to_csv(f'test_results/{row[1].department_code}.csv', index=False)

In [205]:
test = create_rules('BLM')
test
rules_top10(test, ['Veri Madenciliğine Giriş'])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,[Veri Madenciliğine Giriş],[Büyük Veri İşleme Ve Analizi],0.455927,0.234043,0.145897,0.32,1.367273,0.03919,1.126408
1,[Veri Madenciliğine Giriş],[Biyoenformatiğe Giriş],0.455927,0.246201,0.121581,0.266667,1.083128,0.009331,1.027908
2,[Veri Madenciliğine Giriş],[Mobil Programlamaya Giriş],0.455927,0.240122,0.118541,0.26,1.082785,0.009063,1.026863
3,[Veri Madenciliğine Giriş],[Yapay Zeka],0.455927,0.227964,0.115502,0.253333,1.111289,0.011567,1.033977
4,[Veri Madenciliğine Giriş],[Sayısal İşaret İşleme],0.455927,0.227964,0.106383,0.233333,1.023556,0.002448,1.007004
5,[Veri Madenciliğine Giriş],[Doğal Dil İşlemeye Giriş],0.455927,0.18845,0.106383,0.233333,1.238172,0.020464,1.058544
6,[Veri Madenciliğine Giriş],[Bilgiye Erişim Ve Arama Motorları],0.455927,0.182371,0.103343,0.226667,1.242889,0.020196,1.057279
7,[Veri Madenciliğine Giriş],[Robot Teknolojisine Giriş],0.455927,0.191489,0.097264,0.213333,1.114074,0.009959,1.027768
8,[Veri Madenciliğine Giriş],[İleri Ağ Programlama],0.455927,0.182371,0.088146,0.193333,1.060111,0.004998,1.01359
9,[Veri Madenciliğine Giriş],[Yazılım Kalite ve Test Süreci],0.455927,0.18541,0.088146,0.193333,1.042732,0.003612,1.009822
