In [1]:
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
load_dotenv()
DB_URL = os.getenv("DB_URL")
engine = create_engine(DB_URL)
query = """
select c.ContactPersoonId, i.CampagneId, i.CampagneNaam, ca.Startdatum, a.Ondernemingsaard, a.Ondernemingstype, a.PrimaireActiviteit, f.Naam as Functie
from Contactfiche c
join Account a on a.AccountId = c.AccountId
join Inschrijving i on i.ContactficheId = c.ContactPersoonId
join Campagne ca on ca.CampagneId = i.CampagneId
join ContactficheFunctie cf on cf.ContactpersoonId = c.ContactPersoonId
join Functie f on f.FunctieId = cf.FunctieId
where i.CampagneId is not null;
"""

df = pd.read_sql(query, engine)
df.set_index('ContactPersoonId', inplace=True)
df["rating"] = 1

df.head()

Unnamed: 0_level_0,CampagneId,CampagneNaam,Startdatum,Ondernemingsaard,Ondernemingstype,PrimaireActiviteit,Functie,rating
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
D9303EA2-57E5-EB11-8121-001DD8B72B61,F090DEF9-2A5A-EB11-811A-001DD8B72B62,OV - Kick-Off Community Bouw en Vastgoed,2021-09-27 17:00:00,Diensten,Familiebedrijf,Vastgoed,Medewerker,1
451DF235-4B73-E111-B43A-00505680000A,ED956944-5F6F-EA11-8110-001DD8B72B62,OV-Corona Round Tables - Sales,2020-04-14 20:00:00,Diensten,Bedrijf,Consultancy,Bedrijfsleider,1
4235A499-B670-ED11-9561-6045BD895CDC,317CD023-2B1E-ED11-B83D-000D3AAD783A,OV-NW-Nieuwjaarsreceptie regio Oost-Vlaanderen,2022-12-21 19:00:00,Diensten,Bedrijf,Milieu,Verantwoordelijke Commercieel,1
22678F6B-267C-EB11-811D-001DD8B72B62,4CC0CBF3-6056-EB11-8117-001DD8B72B61,OV-Infosessie Start2Export2 Mexico,2021-04-22 16:00:00,Productie & Diensten,Bedrijf,Overige industrie & diensten,Medewerker Commercieel,1
FF17ED97-0942-E611-80D6-005056B06EC4,96ACAD9A-E7E3-EB11-8124-001DD8B72B62,OV-Start2Export2 Mexico traject,2021-09-14 16:00:00,Diensten,Bedrijf,Voeding,Bedrijfsleider,1


In [3]:
df_pivot = pd.pivot_table(df, index='ContactPersoonId', columns=['Ondernemingsaard', 'Ondernemingstype', 'PrimaireActiviteit', 'Functie'], values='rating', fill_value = 0)
df_pivot

Ondernemingsaard,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,...,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten
Ondernemingstype,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,...,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Vrije beroepen,Vrije beroepen,Vrije beroepen
PrimaireActiviteit,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,...,Verenigingen en maatschappelijke organisaties,Zorg,Zorg,Zorg,Zorg,Zorg,Zorg,Accountancy & boekhouding,Accountancy & boekhouding,Farmacie
Functie,Bedrijfsleider,Bestuurder,Contact Lidmaatschap,Directie of kaderlid,Management Assistent,Medewerker,Medewerker Commercieel,Medewerker Communicatie,Medewerker Financieel,Medewerker Juridische dienst,...,Verantwoordelijke Commercieel,Bedrijfsleider,Contact HealthCommunity,Contact Lidmaatschap,Medewerker Financieel,Verantwoordelijke Commercieel,Verantwoordelijke Productie,Medewerker Duurzaamheid / Milieu & Energie,"Verantwoordelijke Duurzaamheid, Milieu & Energie",Verantwoordelijke Financieel
ContactPersoonId,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
00169619-E322-E911-80FB-001DD8B72B62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0017416A-2C6E-E111-B43A-00505680000A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0019C15A-6481-E611-80DE-001DD8B72B61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00223C8E-467F-E311-BBFD-005056B06EB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00231824-53EA-ED11-8849-6045BD895420,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FFF0C643-DAE4-ED11-A7C7-000D3A4AB78E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFF68074-EB93-E911-80FF-001DD8B72B62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFF68536-5DE0-E111-8A53-984BE17C2819,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFFAE2B6-11D5-EC11-A7B5-000D3ABD1F85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
df_pivot.iloc[1]

Ondernemingsaard      Ondernemingstype  PrimaireActiviteit         Functie                                         
Diensten              Bedrijf           Accountancy & boekhouding  Bedrijfsleider                                      0.0
                                                                   Bestuurder                                          0.0
                                                                   Contact Lidmaatschap                                0.0
                                                                   Directie of kaderlid                                0.0
                                                                   Management Assistent                                0.0
                                                                                                                      ... 
Productie & Diensten  Social Profit     Zorg                       Verantwoordelijke Commercieel                       0.0
                       

In [6]:
select_contact = ['DA252429-E5A6-ED11-AAD1-6045BD8956C9', 'ECEECDC0-BE17-EC11-8123-001DD8B72B61']
ss = df_pivot.loc[select_contact]
similarity_matrix = cosine_similarity(df_pivot, ss)
similarity_matrix_df = pd.DataFrame(similarity_matrix, index=df_pivot.index, columns=ss.index)
similarity_matrix_df

# Stack the DataFrame to create a MultiIndex
stacked_similarity_df = similarity_matrix_df.stack()

# Create a DataFrame from the stacked series
high_similarity_df = pd.DataFrame(stacked_similarity_df[stacked_similarity_df > 0.75])

# Reset the index to get a DataFrame with the secondary index
# high_similarity_df.reset_index(inplace=True)
high_similarity_df.index.names = ['Similars', 'Selected']
high_similarity_df.reset_index(inplace=True)
high_similarity_df.set_index(['Selected', 'Similars'], inplace=True)
high_similarity_df.sort_index(level='Selected', inplace=True)
high_similarity_df

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Selected,Similars,Unnamed: 2_level_1
DA252429-E5A6-ED11-AAD1-6045BD8956C9,06D904B5-90CE-EB11-8120-001DD8B72B61,1.000000
DA252429-E5A6-ED11-AAD1-6045BD8956C9,37EC055D-4419-E711-80E4-001DD8B72B62,1.000000
DA252429-E5A6-ED11-AAD1-6045BD8956C9,62E7E675-B69C-ED11-AAD1-6045BD8952CE,1.000000
DA252429-E5A6-ED11-AAD1-6045BD8956C9,DA252429-E5A6-ED11-AAD1-6045BD8956C9,1.000000
DA252429-E5A6-ED11-AAD1-6045BD8956C9,F43F1643-495B-ED11-9562-6045BD895BFB,1.000000
...,...,...
ECEECDC0-BE17-EC11-8123-001DD8B72B61,FAD49C68-CE5D-E811-80F0-001DD8B72B62,0.816497
ECEECDC0-BE17-EC11-8123-001DD8B72B61,FB237511-4BEE-E411-90DE-005056B06EC4,1.000000
ECEECDC0-BE17-EC11-8123-001DD8B72B61,FE06E8C2-5E89-E611-80E3-001DD8B72B62,0.816497
ECEECDC0-BE17-EC11-8123-001DD8B72B61,FF5712A0-4A6B-E111-B43A-00505680000A,1.000000


In [7]:
print(select_contact)
done_campaigns = df.loc[select_contact, ["CampagneId"]]
done_campaigns

['DA252429-E5A6-ED11-AAD1-6045BD8956C9', 'ECEECDC0-BE17-EC11-8123-001DD8B72B61']


Unnamed: 0_level_0,CampagneId
ContactPersoonId,Unnamed: 1_level_1
DA252429-E5A6-ED11-AAD1-6045BD8956C9,8F59ADE1-8A92-ED11-AAD1-6045BD895CDC
ECEECDC0-BE17-EC11-8123-001DD8B72B61,7EDEA493-7845-EE11-BE6E-6045BD8956AE
ECEECDC0-BE17-EC11-8123-001DD8B72B61,4785222F-0963-EC11-8F8F-000D3A2E7886
ECEECDC0-BE17-EC11-8123-001DD8B72B61,4785222F-0963-EC11-8F8F-000D3A2E7886
ECEECDC0-BE17-EC11-8123-001DD8B72B61,7EDEA493-7845-EE11-BE6E-6045BD8956AE


In [25]:
results = []
for selected, s_df in high_similarity_df.groupby(level=0):
    t = df[df.index.isin(s_df.reset_index()['Similars'])].replace(0, np.nan).dropna(axis=1, how='all')
    t.sort_values('Startdatum', ascending=False, inplace=True)
    t = t[t['Startdatum'] > '2023-11-16']
    display(t[['CampagneId', 'CampagneNaam', 'Startdatum']])

    results.append((selected, t))

Unnamed: 0_level_0,CampagneId,CampagneNaam,Startdatum
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


Unnamed: 0_level_0,CampagneId,CampagneNaam,Startdatum
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ED0E3C10-91B0-EA11-8110-001DD8B72B61,0E4173C9-D14B-EE11-BE6F-6045BD8956C9,OV-NW-Rode Loper Selectie-voetbalmatch - KAA G...,2024-02-24 18:30:00
ED0E3C10-91B0-EA11-8110-001DD8B72B61,0E4173C9-D14B-EE11-BE6F-6045BD8956C9,OV-NW-Rode Loper Selectie-voetbalmatch - KAA G...,2024-02-24 18:30:00
ED0E3C10-91B0-EA11-8110-001DD8B72B61,BFC428BF-8150-EE11-BE6E-6045BD8952CE,OV-Haven-North Sea Port Logistics & Supply Cha...,2023-12-14 15:00:00
ED0E3C10-91B0-EA11-8110-001DD8B72B61,BFC428BF-8150-EE11-BE6E-6045BD8952CE,OV-Haven-North Sea Port Logistics & Supply Cha...,2023-12-14 15:00:00
E84E74D0-C662-E511-895A-005056B06EC4,69AF93B9-CC05-EE11-8F6E-6045BD895420,OV-JO Breakfastclub december 2023: What's New ...,2023-12-06 08:00:00
E84E74D0-C662-E511-895A-005056B06EC4,69AF93B9-CC05-EE11-8F6E-6045BD895420,OV-JO Breakfastclub december 2023: What's New ...,2023-12-06 08:00:00
D50338EE-C637-EC11-8125-001DD8B72B61,08563B4A-6871-ED11-9561-6045BD8952CE,OV-Plato Academy 2023 - Workshop 4,2023-12-04 17:00:00
EFA5849E-4E04-E511-ABE8-005056B06EB4,08563B4A-6871-ED11-9561-6045BD8952CE,OV-Plato Academy 2023 - Workshop 4,2023-12-04 17:00:00
D50338EE-C637-EC11-8125-001DD8B72B61,08563B4A-6871-ED11-9561-6045BD8952CE,OV-Plato Academy 2023 - Workshop 4,2023-12-04 17:00:00
EFA5849E-4E04-E511-ABE8-005056B06EB4,08563B4A-6871-ED11-9561-6045BD8952CE,OV-Plato Academy 2023 - Workshop 4,2023-12-04 17:00:00


In [27]:
for s, x in results:
    similar_campagnes_not_done = x[~x['CampagneId'].isin(done_campaigns.loc[s])][['CampagneId', 'CampagneNaam']]
    similar_campagnes_not_done.drop_duplicates(inplace=True)
    x = similar_campagnes_not_done.reset_index()[['CampagneId', 'CampagneNaam']].set_index('CampagneId')
    display(x)

Unnamed: 0_level_0,CampagneNaam
CampagneId,Unnamed: 1_level_1


Unnamed: 0_level_0,CampagneNaam
CampagneId,Unnamed: 1_level_1
0E4173C9-D14B-EE11-BE6F-6045BD8956C9,OV-NW-Rode Loper Selectie-voetbalmatch - KAA G...
BFC428BF-8150-EE11-BE6E-6045BD8952CE,OV-Haven-North Sea Port Logistics & Supply Cha...
69AF93B9-CC05-EE11-8F6E-6045BD895420,OV-JO Breakfastclub december 2023: What's New ...
08563B4A-6871-ED11-9561-6045BD8952CE,OV-Plato Academy 2023 - Workshop 4
D9834556-240C-EE11-8F6E-6045BD8956C9,OV-JO Community Event Starten NJ 2023
7EDEA493-7845-EE11-BE6E-6045BD8956AE,OV-NW-Vaart Club November 2023
B3865100-DEEF-ED11-8849-6045BD895233,OV-NW-Voka Netwekkers bij Well Played
221A1039-2DEF-ED11-8849-6045BD8F0F71,OV-P-Groep 350-PP.Develop Plato 2023
AFF5A384-09D5-ED11-A7C7-6045BD974EB2,OV-NW-Voka Visit-Ducaju-Erpe-Mere
