In [1]:
import pandas as pd
from itertools import combinations
import warnings
warnings.filterwarnings('ignore')

In [2]:
class Apriori:
    
    def __init__(self, data, minS=0.2):
        
        self.data = data          # Транзакции
        self.n = data.shape[0]    # Количество транзакций
        self.minS = minS          # Порог частного набора
        self.fs = {}              # Множество частных наборов
        self.res = pd.DataFrame(columns=[
            'Правило',
            'Поддержка, S', 
            'Достоверность, C',
            'Лифт, L',
            'Левередж, T'
        ]) # Таблица ассоциативных правил

        # Однопредметные частные наборы
        f1 = self.data.stack().value_counts()
        f1 = dict(f1[f1/self.n >= self.minS])
        
        # Формирование множества i-предметных частных наборов
        combs = []
        for i in range(1, len(f1)):
            combs += list(combinations(f1, i))
        counts = [self.data.apply(lambda row: all(elem in list(row) for elem in comb), axis=1).sum() for comb in combs]
        self.fs = {comb: count for comb, count in zip(combs, counts) if count/self.n >= self.minS}    
                    
        for n in list(reversed(list(self.fs)[len(f1):])):
            length = len(n)
            for i in range(length):
                for j in range(1, length//2+1):
                    if (length%2 != 0 or (j != length//2 and j <= length-i) or i < length//2):
                        a = n[i:i+j]
                        b = n[:i]+n[i+j:]
                        s = self.fs[n]/self.n
                        c = self.fs[n]/self.fs[a]
                        self.res = self.res.append({
                            'Правило': '{0} -> {1}'.format(a, b),
                            'Поддержка, S': s,
                            'Достоверность, C': c,
                            'Лифт, L': c/(self.fs[b]/self.n),
                            'Левередж, T' : s-((self.fs[a]/self.n)*(self.fs[b]/self.n))
                        }, ignore_index=True)
                        a, b = b, a
                        c = self.fs[n]/self.fs[a]
                        self.res = self.res.append({
                            'Правило': '{0} -> {1}'.format(a, b),
                            'Поддержка, S': s,
                            'Достоверность, C': c,
                            'Лифт, L': c/(self.fs[b]/self.n),
                            'Левередж, T' : s-((self.fs[a]/self.n)*(self.fs[b]/self.n))
                        }, ignore_index=True)
                        
    def run(self):
        return self.res

In [3]:
data = pd.read_csv('data.csv', header=None)
data

Unnamed: 0,0,1,2,3,4,5
0,Python,Java,C++,JavaScript,C#,PHP
1,Python,Java,Kotlin,C++,,
2,C++,Python,Java,,,
3,Kotlin,Swift,,,,
4,Python,JavaScript,PHP,,,
5,Python,C++,,,,
6,Java,C#,Kotlin,,,
7,JavaScript,PHP,,,,
8,Kotlin,JavaScript,Swift,,,
9,C#,PHP,JavaScript,,,


In [4]:
minS = 0.15
apriori = Apriori(data, minS)

In [5]:
apriori.run().head(50)

Unnamed: 0,Правило,"Поддержка, S","Достоверность, C","Лифт, L","Левередж, T"
0,"('Python',) -> ('JavaScript', 'PHP')",0.176471,0.3,1.02,0.00346
1,"('JavaScript', 'PHP') -> ('Python',)",0.176471,0.6,1.02,0.00346
2,"('JavaScript',) -> ('Python', 'PHP')",0.176471,0.428571,1.821429,0.079585
3,"('Python', 'PHP') -> ('JavaScript',)",0.176471,0.75,1.821429,0.079585
4,"('PHP',) -> ('Python', 'JavaScript')",0.176471,0.5,2.125,0.093426
5,"('Python', 'JavaScript') -> ('PHP',)",0.176471,0.75,2.125,0.093426
6,"('Python',) -> ('C++', 'Java')",0.176471,0.3,1.02,0.00346
7,"('C++', 'Java') -> ('Python',)",0.176471,0.6,1.02,0.00346
8,"('C++',) -> ('Python', 'Java')",0.176471,0.333333,1.888889,0.083045
9,"('Python', 'Java') -> ('C++',)",0.176471,1.0,1.888889,0.083045
