In [39]:
import logging
import os
import sys
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
from itertools import combinations

sys.path.insert(0, os.getcwd())
from recommender_engine.data_layer.db_connect import return_evidence_log, return_reduced_base_evidence_log, return_reduced_evidence_log


## Create products bought together
## https://towardsdatascience.com/apriori-association-rule-mining-explanation-and-python-implementation-290b42afdfc6

In [40]:
evidence_log_df = return_evidence_log()
reduced_evidence_log_df = evidence_log_df.groupby(['base_article_id','user_id']).original_timestamp.last().reset_index()


### Make sure to run analysis_of_evidence_log first to create the newest version of the reduced evidence_log
#reduced_evidence_log_pandas_df = pd.read_csv('reduced_evidence_log.csv')
#reduced_evidence_log_pandas_df.head()

In [44]:
len(evidence_log_df)

19424

In [46]:
evidence_log_df[evidence_log_df.user_id.isnull()]

Unnamed: 0,slug,original_timestamp,category,user_id,referrer,article_id,base_article_id
49,aleris-hamlet-fertility-disse-personer-moder-d...,2022-02-15 10:23:25+00:00,Din klinik,,home,i18n.064865bb-0cf5-4356-8cb8-152f25fcf3c9.da-DK,064865bb-0cf5-4356-8cb8-152f25fcf3c9
1181,"danielle:-""jeg-foler-mig-anderledes-gravid-end...",2022-02-23 16:33:29+00:00,Personlige historier,,home,i18n.36aa9a6f-1fbf-42c1-a1c7-7d1d78cca568.da-DK,36aa9a6f-1fbf-42c1-a1c7-7d1d78cca568
2053,statistik-hvad-er-sandsynligheden-for-graviditet,2023-01-19 14:35:40.181000+00:00,Fertilitetsbehandling,,home,i18n.66ce674e-25e7-444d-943d-f357a4b376ab.da-DK,66ce674e-25e7-444d-943d-f357a4b376ab
2469,"terapeut:-""det-vigtigste-er-at-i-begge-tager-a...",2022-04-11 17:40:11+00:00,Parforhold og intimitet,,home,i18n.7222146d-329d-4255-8aaa-1385f7f916c5.da-DK,7222146d-329d-4255-8aaa-1385f7f916c5
2525,hvad-er-reglerne-for-fertilitetsbehandling,2023-01-19 14:35:28.037000+00:00,Før behandling,,home,i18n.74f16535-5586-4ee9-b1e0-a53625543a4c.da-DK,74f16535-5586-4ee9-b1e0-a53625543a4c
3443,hvad-sker-der-med-kroppen-under-fertilitetsbeh...,2023-01-19 14:35:58.548000+00:00,Fertilitetsbehandling,,home,i18n.c05759bf-c3e3-44d5-acf1-7e5375261f61.da-DK,c05759bf-c3e3-44d5-acf1-7e5375261f61
4156,ved-du-hvad-aeglosningsfasen-indeholder-iui,2022-03-03 12:03:16+00:00,Fertilitetsbehandling,,content,i18n.001db2b8-07ed-4811-a72e-3e992cada878.da-DK,001db2b8-07ed-4811-a72e-3e992cada878
4324,mentor:-hvordan-pavirker-stress-min-fertilitet,2022-03-10 07:34:35+00:00,Mental sundhed,,content,i18n.08566c97-9877-46df-8b5b-e744e9dd7fcb.da-DK,08566c97-9877-46df-8b5b-e744e9dd7fcb
4355,"""op-pa-hesten-igen""-om-manglende-empati",2022-03-10 07:31:21+00:00,Mental sundhed,,content,i18n.08a06408-4bef-4a3c-ad3e-270bd3568460.da-DK,08a06408-4bef-4a3c-ad3e-270bd3568460
4403,5-ting-om-aegdonorer,2022-03-03 12:03:39+00:00,Fertilitetsbehandling,,content,i18n.0c1e43b4-39a6-40ec-9751-a3143234d459.da-DK,0c1e43b4-39a6-40ec-9751-a3143234d459


In [41]:
reduced_base_evidence_log_df = return_reduced_base_evidence_log()
reduced_base_evidence_log_df.head()


Unnamed: 0,base_article_id,user_id,original_timestamp
0,001db2b8-07ed-4811-a72e-3e992cada878,CB89F8FA-2F97-4F3E-AB87-29501FA9BDD5,2022-02-26 20:53:41+00:00
1,001db2b8-07ed-4811-a72e-3e992cada878,37B9E800-B041-43A9-ADEC-586F24A00894,2022-06-05 15:38:57+00:00
2,001db2b8-07ed-4811-a72e-3e992cada878,7304C0DD-BF39-43DE-BB78-AD81309AEFAC,2022-03-28 11:39:14+00:00
3,001db2b8-07ed-4811-a72e-3e992cada878,54901615-67C9-42FD-BF7C-BF0F0D48E4F7,2022-08-29 11:52:05+00:00
4,001db2b8-07ed-4811-a72e-3e992cada878,E6CFB608-AFF2-452E-BDFE-88B310DF8E74,2022-05-09 14:01:23+00:00


In [42]:
len(reduced_base_evidence_log_df)

13117

In [38]:
reduced_base_evidence_log_df.sort_values(by=['user_id','original_timestamp'])

Unnamed: 0,base_article_id,user_id,original_timestamp
3691,2cc30303-4159-425e-8d8d-1ec6cb1a12a0,00225EC4-A85A-4FDC-B972-A58117379D33,2022-06-17 18:29:32+00:00
8399,900b9188-a0e4-4626-933f-467df9b432f7,00225EC4-A85A-4FDC-B972-A58117379D33,2022-06-17 18:31:37+00:00
7220,7742ae70-de06-4f3c-a901-81156c306870,00225EC4-A85A-4FDC-B972-A58117379D33,2022-06-17 18:32:38+00:00
1004,0f99f813-e56d-4fd5-ace1-b1ebca5602ee,00349F2F-F174-4098-AFD8-6F98FD67A41D,2022-08-07 18:36:06+00:00
8101,8507d74d-81b8-4ff5-b936-ccc92245009f,00349F2F-F174-4098-AFD8-6F98FD67A41D,2022-08-07 18:39:55+00:00
...,...,...,...
9952,b5c07689-1429-4c5d-8325-e34a7ffd2dda,,2022-12-28 13:18:15.589495+00:00
396,07945ff7-49f3-4ef2-a86f-32cd4b9a64cc,,2022-12-28 17:46:35.198000+00:00
7097,74f16535-5586-4ee9-b1e0-a53625543a4c,,2023-01-19 14:35:28.037000+00:00
6160,66ce674e-25e7-444d-943d-f357a4b376ab,,2023-01-19 14:35:40.181000+00:00


In [43]:
len(reduced_evidence_log_df)

13083

In [37]:
reduced_evidence_log_df.sort_values(by=['user_id','original_timestamp'])

Unnamed: 0,base_article_id,user_id,original_timestamp
3677,2cc30303-4159-425e-8d8d-1ec6cb1a12a0,00225EC4-A85A-4FDC-B972-A58117379D33,2022-06-17 18:29:32+00:00
8355,900b9188-a0e4-4626-933f-467df9b432f7,00225EC4-A85A-4FDC-B972-A58117379D33,2022-06-17 18:31:37+00:00
7176,7742ae70-de06-4f3c-a901-81156c306870,00225EC4-A85A-4FDC-B972-A58117379D33,2022-06-17 18:31:43+00:00
969,0f99f813-e56d-4fd5-ace1-b1ebca5602ee,00349F2F-F174-4098-AFD8-6F98FD67A41D,2022-08-07 18:36:06+00:00
8059,8507d74d-81b8-4ff5-b936-ccc92245009f,00349F2F-F174-4098-AFD8-6F98FD67A41D,2022-08-07 18:39:55+00:00
...,...,...,...
8942,998f6880-52e4-49ef-a5e0-7ca538b751d3,fee3530d-b0a2-4e24-8d0a-b2009b8c3232,2022-12-21 21:44:52.677000+00:00
4734,47d20565-03e6-48b5-b8c1-5577de30e6b8,fee3530d-b0a2-4e24-8d0a-b2009b8c3232,2022-12-25 20:21:21.116000+00:00
12945,fc3a395c-5618-4bc6-b271-53ab05a46ea2,fee3530d-b0a2-4e24-8d0a-b2009b8c3232,2022-12-27 17:40:48.929000+00:00
5658,5cf64b0b-af3d-43db-8769-9cd881fd26a6,fee3530d-b0a2-4e24-8d0a-b2009b8c3232,2023-01-03 12:20:53.183000+00:00


In [33]:
reduced_base_evidence_log_df.columns

Index(['base_article_id', 'user_id', 'original_timestamp'], dtype='object')

In [34]:
reduced_evidence_log_df.columns

Index(['base_article_id', 'user_id', 'original_timestamp'], dtype='object')

In [36]:
reduced_base_evidence_log_df.compare(reduced_evidence_log_df)

ValueError: Can only compare identically-labeled DataFrame objects

In [47]:
reduced_evidence_log_df = return_reduced_evidence_log()
reduced_evidence_log_df.head()


Unnamed: 0,base_article_id,user_id,original_timestamp,article_id
0,001db2b8-07ed-4811-a72e-3e992cada878,C1C40183-8651-4CD4-BA89-114B904E8E5C,2022-02-25 16:40:35+00:00,i18n.001db2b8-07ed-4811-a72e-3e992cada878.da-DK
1,001db2b8-07ed-4811-a72e-3e992cada878,2DC2B286-9BF9-41A8-A0A4-38DA493361E0,2022-01-03 15:30:11+00:00,i18n.001db2b8-07ed-4811-a72e-3e992cada878.da-DK
2,001db2b8-07ed-4811-a72e-3e992cada878,0EC4D1D1-19F7-4C65-9384-E900AFB10A20,2022-01-08 15:54:50+00:00,i18n.001db2b8-07ed-4811-a72e-3e992cada878.da-DK
3,001db2b8-07ed-4811-a72e-3e992cada878,0899017E-BDCE-4A39-BF61-469117EF24E7,2022-06-27 09:47:39+00:00,i18n.001db2b8-07ed-4811-a72e-3e992cada878.da-DK
4,001db2b8-07ed-4811-a72e-3e992cada878,E6CFB608-AFF2-452E-BDFE-88B310DF8E74,2022-05-09 14:01:23+00:00,i18n.001db2b8-07ed-4811-a72e-3e992cada878.da-DK


In [48]:
len(reduced_evidence_log_df)

13086