In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Загрузка csv файлов с транзакциями в DataFrame

bachelors_df = pd.read_csv(r'C:\Users\megan\PycharmProjects\PatternMining\bachelors_transactions.csv')
majors_df = pd.read_csv(r'C:\Users\megan\PycharmProjects\PatternMining\majors_transactions.csv')

# Редактирование индекса

bachelors_df = bachelors_df.drop(['Unnamed: 0'], axis = 1)

majors_df = majors_df.drop(['Unnamed: 0'], axis = 1)

bachelors_df.head(60) # Транзакции бакалавров

Unnamed: 0,0,1,2,3,4,5,6
0,Term1,2013-2016,OES,Finished,HQAP50,FQW4,Retake
1,Term2,2013-2016,OES,Finished,HQAP50,FQW4,Retake
2,Term3,2013-2016,OES,Finished,HQAP-50,FQW4,Retake
3,Term4,2013-2016,OES,Finished,HQAP-50,FQW4,Retake
4,Term5,2013-2016,OES,Finished,HQAP-50,FQW4,Retake
5,Term6,2013-2016,OES,Finished,HQAP-50,FQW4,Retake
6,Term7,2013-2016,OES,Finished,HQAP50,FQW4,Retake
7,Term8,2013-2016,OES,Finished,HQAP75,FQW4,NoRetake
8,Term1,2013-2016,HES,Finished,HQAP85,FQW5,NoRetake
9,Term2,2013-2016,HES,Finished,HQAP85,FQW5,NoRetake


In [2]:
# Преобразование в вид разреженной матрицы

bachelors_columns = list(bachelors_df.columns)
bachelors_matrix = pd.get_dummies(bachelors_df, columns=bachelors_columns)

majors_columns = list(majors_df.columns)
majors_matrix = pd.get_dummies(majors_df, columns=majors_columns)

# Редактирование названий столбцов

columns = list(bachelors_matrix.columns)
for column in columns:
    bachelors_matrix.rename(columns={column : column[2:]}, inplace=True)
    
columns = list(majors_matrix.columns)
for column in columns:
    majors_matrix.rename(columns={column : column[2:]}, inplace=True)
      
bachelors_matrix # Разреженная матрица транзакций бакалавров

Unnamed: 0,Term1,Term2,Term3,Term4,Term5,Term6,Term7,Term8,2013-2016,2014-2017,...,HQAP100,HQAP50,HQAP75,HQAP85,FQW1,FQW3,FQW4,FQW5,NoRetake,Retake
0,1,0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,1,0,0,1
1,0,1,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,1,0,0,1
2,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,1
3,0,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,1
4,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2084,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2085,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
2086,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2087,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [3]:
# Поиск частых наборов с параметром минимальной поддержки

bachelors_freaquent_itemsets = apriori(bachelors_matrix, min_support=0.25, use_colnames=True)

# Поиск ассоциативных правил с заданной метрикой и её минимальным значением

bachelors_rules = association_rules(bachelors_freaquent_itemsets, metric='lift', min_threshold=1)

bachelors_rules.head(60) # Ассоциативные правила для бакалавров

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(HES),(Finished),0.393968,0.704643,0.321685,0.816525,1.158778,0.044078,1.609791
1,(Finished),(HES),0.704643,0.393968,0.321685,0.456522,1.158778,0.044078,1.115098
2,(HES),(FQW5),0.393968,0.471039,0.264241,0.670717,1.42391,0.078667,1.606403
3,(FQW5),(HES),0.471039,0.393968,0.264241,0.560976,1.42391,0.078667,1.380405
4,(HES),(NoRetake),0.393968,0.516515,0.282432,0.716889,1.387935,0.078941,1.70776
5,(NoRetake),(HES),0.516515,0.393968,0.282432,0.546803,1.387935,0.078941,1.337235
6,(OES),(Retake),0.606032,0.483485,0.371948,0.613744,1.269417,0.078941,1.337235
7,(Retake),(OES),0.483485,0.606032,0.371948,0.769307,1.269417,0.078941,1.70776
8,(FQW5),(Finished),0.471039,0.704643,0.471039,1.0,1.419158,0.139124,inf
9,(Finished),(FQW5),0.704643,0.471039,0.471039,0.668478,1.419158,0.139124,1.595555


In [4]:
majors_matrix # Рахреженная матрица транзакций Магистров

Unnamed: 0,Term1,Term2,Term3,Term4,2015-2016,2016-2017,2017-2018,2018-2019,2019-2020,2020-2021,...,HQAP100,HQAP50,HQAP75,HQAP85,FQW1,FQW3,FQW4,FQW5,NoRetake,Retake
0,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,1,0
1,0,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,1,0
2,0,0,1,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,1,1,0
3,0,0,0,1,1,0,0,0,0,0,...,1,0,0,0,0,0,0,1,1,0
4,1,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
414,1,0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,1
415,0,1,0,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,1,0
416,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
417,1,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1


In [5]:
majors_freaquent_itemsets = apriori(majors_matrix, min_support=0.4, use_colnames=True)
majors_rules = association_rules(majors_freaquent_itemsets, metric='lift', min_threshold=1)

majors_rules.head(60) # Ассоциативные правила магистров

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(OES),(Finished),0.625298,0.821002,0.544153,0.870229,1.059959,0.030781,1.379335
1,(Finished),(OES),0.821002,0.625298,0.544153,0.662791,1.059959,0.030781,1.111184
2,(HQAP100),(Finished),0.49642,0.821002,0.484487,0.975962,1.188744,0.076925,7.446301
3,(Finished),(HQAP100),0.821002,0.49642,0.484487,0.590116,1.188744,0.076925,1.228592
4,(FQW5),(Finished),0.630072,0.821002,0.630072,1.0,1.218023,0.112781,inf
5,(Finished),(FQW5),0.821002,0.630072,0.630072,0.767442,1.218023,0.112781,1.590692
6,(Finished),(NoRetake),0.821002,0.761337,0.668258,0.813953,1.069111,0.043199,1.282816
7,(NoRetake),(Finished),0.761337,0.821002,0.668258,0.877743,1.069111,0.043199,1.464109
8,(HQAP100),(NoRetake),0.49642,0.761337,0.494033,0.995192,1.307165,0.116091,49.642005
9,(NoRetake),(HQAP100),0.761337,0.49642,0.494033,0.648903,1.307165,0.116091,1.434304


In [6]:
translated_bachelors_rules = pd.read_excel(r'C:\Users\megan\PycharmProjects\PatternMining\translated_bachelors_rules.xlsx')
translated_bachelors_rules = translated_bachelors_rules.drop(['Unnamed: 0'], axis = 1)
translated_bachelors_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,['Закончил'],['Высокобальник'],0.704643,0.393968,0.321685,0.456522,1.158778,0.044078,1.115098
1,['Высокобальник'],['Закончил'],0.393968,0.704643,0.321685,0.816525,1.158778,0.044078,1.609791
2,['Высокобальник'],['ВКР5'],0.393968,0.471039,0.264241,0.670717,1.42391,0.078667,1.606403
3,['ВКР5'],['Высокобальник'],0.471039,0.393968,0.264241,0.560976,1.42391,0.078667,1.380405
4,['Высокобальник'],['Нет пересдачи за семестр'],0.393968,0.516515,0.282432,0.716889,1.387935,0.078941,1.70776
5,['Нет пересдачи за семестр'],['Высокобальник'],0.516515,0.393968,0.282432,0.546803,1.387935,0.078941,1.337235
6,['Есть пересдача за семестр'],['Не высокобальник'],0.483485,0.606032,0.371948,0.769307,1.269417,0.078941,1.70776
7,['Не высокобальник'],['Есть пересдача за семестр'],0.606032,0.483485,0.371948,0.613744,1.269417,0.078941,1.337235
8,['Закончил'],['ВКР5'],0.704643,0.471039,0.471039,0.668478,1.419158,0.139124,1.595555
9,['ВКР5'],['Закончил'],0.471039,0.704643,0.471039,1.0,1.419158,0.139124,inf


In [9]:
translated_majors_rules = pd.read_excel(r'C:\Users\megan\PycharmProjects\PatternMining\translated_majors_rules.xlsx')
translated_majors_rules = translated_majors_rules.drop(['Unnamed: 0'], axis = 1)
translated_majors_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,['Закончил'],['Не высокобальник'],0.821002,0.625298,0.544153,0.662791,1.059959,0.030781,1.111184
1,['Не высокобальник'],['Закончил'],0.625298,0.821002,0.544153,0.870229,1.059959,0.030781,1.379335
2,['Закончил'],['Качественная успеваемость 100'],0.821002,0.49642,0.484487,0.590116,1.188744,0.076925,1.228592
3,['Качественная успеваемость 100'],['Закончил'],0.49642,0.821002,0.484487,0.975962,1.188744,0.076925,7.446301
4,['Закончил'],['ВКР5'],0.821002,0.630072,0.630072,0.767442,1.218023,0.112781,1.590692
5,['ВКР5'],['Закончил'],0.630072,0.821002,0.630072,1.0,1.218023,0.112781,inf
6,['Закончил'],['Нет пересдачи за семестр'],0.821002,0.761337,0.668258,0.813953,1.069111,0.043199,1.282816
7,['Нет пересдачи за семестр'],['Закончил'],0.761337,0.821002,0.668258,0.877743,1.069111,0.043199,1.464109
8,['Качественная успеваемость 100'],['Нет пересдачи за семестр'],0.49642,0.761337,0.494033,0.995192,1.307165,0.116091,49.642005
9,['Нет пересдачи за семестр'],['Качественная успеваемость 100'],0.761337,0.49642,0.494033,0.648903,1.307165,0.116091,1.434304
