In [1]:
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
load_dotenv()
DB_URL = os.getenv("DB_URL")
engine = create_engine(DB_URL)
# select 26 rows from SQL table to insert in dataframe.
query = """
select c.ContactPersoonId, i.CampagneId, i.CampagneNaam, a.Ondernemingsaard, a.Ondernemingstype, a.PrimaireActiviteit, f.Naam as Functie
from Contactfiche c
join Account a on a.AccountId = c.AccountId
join Inschrijving i on i.ContactficheId = c.ContactPersoonId
join ContactficheFunctie cf on cf.ContactpersoonId = c.ContactPersoonId
join Functie f on f.FunctieId = cf.FunctieId
where CampagneId is not null;
"""
df = pd.read_sql(query, engine)
df.set_index('ContactPersoonId', inplace=True)
df["rating"] = 1

df.head()

Unnamed: 0_level_0,CampagneId,CampagneNaam,Ondernemingsaard,Ondernemingstype,PrimaireActiviteit,Functie,rating
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
D9303EA2-57E5-EB11-8121-001DD8B72B61,F090DEF9-2A5A-EB11-811A-001DD8B72B62,OV - Kick-Off Community Bouw en Vastgoed,Diensten,Familiebedrijf,Vastgoed,Medewerker,1
451DF235-4B73-E111-B43A-00505680000A,ED956944-5F6F-EA11-8110-001DD8B72B62,OV-Corona Round Tables - Sales,Diensten,Bedrijf,Consultancy,Bedrijfsleider,1
4235A499-B670-ED11-9561-6045BD895CDC,317CD023-2B1E-ED11-B83D-000D3AAD783A,OV-NW-Nieuwjaarsreceptie regio Oost-Vlaanderen,Diensten,Bedrijf,Milieu,Verantwoordelijke Commercieel,1
22678F6B-267C-EB11-811D-001DD8B72B62,4CC0CBF3-6056-EB11-8117-001DD8B72B61,OV-Infosessie Start2Export2 Mexico,Productie & Diensten,Bedrijf,Overige industrie & diensten,Medewerker Commercieel,1
FF17ED97-0942-E611-80D6-005056B06EC4,96ACAD9A-E7E3-EB11-8124-001DD8B72B62,OV-Start2Export2 Mexico traject,Diensten,Bedrijf,Voeding,Bedrijfsleider,1


In [14]:
df_pivot = pd.pivot_table(df, index='ContactPersoonId', columns=['Ondernemingsaard', 'Ondernemingstype', 'PrimaireActiviteit', 'Functie'], values='rating', fill_value = 0)
df_pivot
# df_pivot
# df_pivot[df_pivot[' OV-JO-Breakfastclub December 2019'] == 1]
# df_contacts_features = df[['Ondernemingsaard', 'Ondernemingstype', 'PrimaireActiviteit']]
# df_contacts_features = pd.get_dummies(df_contacts_features)
# df_contacts_features

Ondernemingsaard,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,...,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten
Ondernemingstype,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,...,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Vrije beroepen,Vrije beroepen,Vrije beroepen
PrimaireActiviteit,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,...,Verenigingen en maatschappelijke organisaties,Zorg,Zorg,Zorg,Zorg,Zorg,Zorg,Accountancy & boekhouding,Accountancy & boekhouding,Farmacie
Functie,Bedrijfsleider,Bestuurder,Contact Lidmaatschap,Directie of kaderlid,Management Assistent,Medewerker,Medewerker Commercieel,Medewerker Communicatie,Medewerker Financieel,Medewerker Juridische dienst,...,Verantwoordelijke Commercieel,Bedrijfsleider,Contact HealthCommunity,Contact Lidmaatschap,Medewerker Financieel,Verantwoordelijke Commercieel,Verantwoordelijke Productie,Medewerker Duurzaamheid / Milieu & Energie,"Verantwoordelijke Duurzaamheid, Milieu & Energie",Verantwoordelijke Financieel
ContactPersoonId,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
00169619-E322-E911-80FB-001DD8B72B62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0017416A-2C6E-E111-B43A-00505680000A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0019C15A-6481-E611-80DE-001DD8B72B61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00223C8E-467F-E311-BBFD-005056B06EB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00231824-53EA-ED11-8849-6045BD895420,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FFF0C643-DAE4-ED11-A7C7-000D3A4AB78E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFF68074-EB93-E911-80FF-001DD8B72B62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFF68536-5DE0-E111-8A53-984BE17C2819,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFFAE2B6-11D5-EC11-A7B5-000D3ABD1F85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
similarity_matrix = cosine_similarity(df_pivot)
similarity_matrix_df = pd.DataFrame(similarity_matrix, index=df_pivot.index, columns=df_pivot.index)
similarity_matrix_df

ContactPersoonId,00169619-E322-E911-80FB-001DD8B72B62,0017416A-2C6E-E111-B43A-00505680000A,0019C15A-6481-E611-80DE-001DD8B72B61,00223C8E-467F-E311-BBFD-005056B06EB4,00231824-53EA-ED11-8849-6045BD895420,0025D44A-C19F-E311-B1AE-005056B06EC4,0033DB6B-815B-E511-8718-005056B06EC4,0036CCCE-B36F-E111-B43A-00505680000A,003740F4-F4A0-EB11-811E-001DD8B72B62,00426A48-F851-EC11-8C62-000D3ABFC672,...,FFC4A1F5-7BA0-EC11-B400-6045BD93C7AE,FFCF2AB4-BA72-E111-B43A-00505680000A,FFD3449C-3A6C-E111-B43A-00505680000A,FFD90821-3A6F-E111-B43A-00505680000A,FFE9091D-AC4D-E211-9B51-005056B06EC4,FFF0C643-DAE4-ED11-A7C7-000D3A4AB78E,FFF68074-EB93-E911-80FF-001DD8B72B62,FFF68536-5DE0-E111-8A53-984BE17C2819,FFFAE2B6-11D5-EC11-A7B5-000D3ABD1F85,FFFEA9CB-ED93-EC11-B400-000D3A2B10EB
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00169619-E322-E911-80FB-001DD8B72B62,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0017416A-2C6E-E111-B43A-00505680000A,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0019C15A-6481-E611-80DE-001DD8B72B61,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00223C8E-467F-E311-BBFD-005056B06EB4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00231824-53EA-ED11-8849-6045BD895420,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FFF0C643-DAE4-ED11-A7C7-000D3A4AB78E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
FFF68074-EB93-E911-80FF-001DD8B72B62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
FFF68536-5DE0-E111-8A53-984BE17C2819,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
FFFAE2B6-11D5-EC11-A7B5-000D3ABD1F85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [16]:
select_contact = "00169619-E322-E911-80FB-001DD8B72B62"
similarities = similarity_matrix_df[select_contact].drop(select_contact)
weights = similarities/similarities.sum()

In [42]:
#num of silimar users
n = 10
#set a threshold for similarity
user_similarity_threshold =  0.5
# Get top n similar users
similar_users = similarity_matrix_df[similarity_matrix_df[select_contact]>user_similarity_threshold][select_contact].sort_values(ascending=False)[:n]

In [43]:
similar_users

ContactPersoonId
00169619-E322-E911-80FB-001DD8B72B62    1.0
29631920-B16A-E111-B43A-00505680000A    1.0
F1862259-775F-E911-80FD-001DD8B72B62    1.0
B9AD6B5C-0473-E111-B43A-00505680000A    1.0
892C77B4-0992-E611-80DE-001DD8B72B61    1.0
01218324-0942-E611-80D6-005056B06EC4    1.0
6BEDA1C3-C08D-ED11-81AD-6045BD895D85    1.0
298B17CE-4358-E311-BBFD-005056B06EB4    1.0
6E9E2D4E-A88E-EA11-810F-001DD8B72B61    1.0
0D4A271E-A66B-E111-B43A-00505680000A    1.0
Name: 00169619-E322-E911-80FB-001DD8B72B62, dtype: float64

In [44]:
done_campaigns = df.loc[df.index == select_contact]['CampagneId']
done_campaigns

ContactPersoonId
00169619-E322-E911-80FB-001DD8B72B62    EA3C6FE0-758E-E811-80F3-001DD8B72B61
00169619-E322-E911-80FB-001DD8B72B62    67B929FF-34F2-E811-80F9-001DD8B72B61
00169619-E322-E911-80FB-001DD8B72B62    EA3C6FE0-758E-E811-80F3-001DD8B72B61
00169619-E322-E911-80FB-001DD8B72B62    67B929FF-34F2-E811-80F9-001DD8B72B61
00169619-E322-E911-80FB-001DD8B72B62    67B929FF-34F2-E811-80F9-001DD8B72B61
00169619-E322-E911-80FB-001DD8B72B62    EA3C6FE0-758E-E811-80F3-001DD8B72B61
00169619-E322-E911-80FB-001DD8B72B62    67B929FF-34F2-E811-80F9-001DD8B72B61
00169619-E322-E911-80FB-001DD8B72B62    EA3C6FE0-758E-E811-80F3-001DD8B72B61
Name: CampagneId, dtype: object

In [48]:
# Movies that similar users watched.
similar_campagnes = df[df.index.isin(similar_users.index)].replace(0, np.nan).dropna(axis=1, how='all')

#similar_user_movies = movies_ratings_pivot[movies_ratings_pivot.index.isin(top similar_users.index)].dropna(axis=1, how='all')
similar_campagnes = similar_campagnes.drop(select_contact, axis=0)
similar_campagnes

Unnamed: 0_level_0,CampagneId,CampagneNaam,Ondernemingsaard,Ondernemingstype,PrimaireActiviteit,Functie,rating
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01218324-0942-E611-80D6-005056B06EC4,AF18B713-6DB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Aalst 2018,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Bedrijfsleider,1
0D4A271E-A66B-E111-B43A-00505680000A,46C0357E-13E0-ED11-A7C6-6045BD895FE4,OV-NW/BB-Voka Politica Erpe-Mere 2023,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Bedrijfsleider,1
298B17CE-4358-E311-BBFD-005056B06EB4,EA3C6FE0-758E-E811-80F3-001DD8B72B61,OV-NW-Connect Gent 2019,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Bedrijfsleider,1
892C77B4-0992-E611-80DE-001DD8B72B61,A534E7FC-EEF3-E711-80EE-001DD8B72B62,OV-NW-Voka Update: The Big Refresh 4,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Bedrijfsleider,1
01218324-0942-E611-80D6-005056B06EC4,AF18B713-6DB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Aalst 2018,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Bedrijfsleider,1
...,...,...,...,...,...,...,...
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,BDCD23B8-6712-ED11-B83D-000D3A43E167,OV-NW-220915-Netwekkers avondeditie,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Contact Lidmaatschap,1
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,A2F63005-8EA8-ED11-AAD1-6045BD895D85,OV-NW-Voka Bilan 2023,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Contact Lidmaatschap,1
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,9646B4F7-B51D-EA11-8109-001DD8B72B62,OV-P-GROEP N-StartUp-Bryo 2020,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Contact Lidmaatschap,1
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,3BAA0411-71A0-EC11-B400-6045BD93CA86,OV-NW-Voka Vista - Community event 05/2022,Productie & Diensten,Bedrijf,Technologische industrie & diensten,Contact Lidmaatschap,1


In [49]:
similar_campagnes[['CampagneId', 'CampagneNaam']]


Unnamed: 0_level_0,CampagneId,CampagneNaam
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1
01218324-0942-E611-80D6-005056B06EC4,AF18B713-6DB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Aalst 2018
0D4A271E-A66B-E111-B43A-00505680000A,46C0357E-13E0-ED11-A7C6-6045BD895FE4,OV-NW/BB-Voka Politica Erpe-Mere 2023
298B17CE-4358-E311-BBFD-005056B06EB4,EA3C6FE0-758E-E811-80F3-001DD8B72B61,OV-NW-Connect Gent 2019
892C77B4-0992-E611-80DE-001DD8B72B61,A534E7FC-EEF3-E711-80EE-001DD8B72B62,OV-NW-Voka Update: The Big Refresh 4
01218324-0942-E611-80D6-005056B06EC4,AF18B713-6DB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Aalst 2018
...,...,...
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,BDCD23B8-6712-ED11-B83D-000D3A43E167,OV-NW-220915-Netwekkers avondeditie
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,A2F63005-8EA8-ED11-AAD1-6045BD895D85,OV-NW-Voka Bilan 2023
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,9646B4F7-B51D-EA11-8109-001DD8B72B62,OV-P-GROEP N-StartUp-Bryo 2020
6E9E2D4E-A88E-EA11-810F-001DD8B72B61,3BAA0411-71A0-EC11-B400-6045BD93CA86,OV-NW-Voka Vista - Community event 05/2022


In [56]:
similar_campagnes_not_done = similar_campagnes[~similar_campagnes['CampagneId'].isin(done_campaigns)][['CampagneId', 'CampagneNaam']]
similar_campagnes_not_done.set_index('CampagneId', inplace=True)
similar_campagnes_not_done

Unnamed: 0_level_0,CampagneNaam
CampagneId,Unnamed: 1_level_1
AF18B713-6DB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Aalst 2018
46C0357E-13E0-ED11-A7C6-6045BD895FE4,OV-NW/BB-Voka Politica Erpe-Mere 2023
A534E7FC-EEF3-E711-80EE-001DD8B72B62,OV-NW-Voka Update: The Big Refresh 4
AF18B713-6DB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Aalst 2018
39BBF8F7-6F3A-E911-80FC-001DD8B72B61,OV-P-Groep J2 - StartUp-Bryo2019
...,...
BDCD23B8-6712-ED11-B83D-000D3A43E167,OV-NW-220915-Netwekkers avondeditie
A2F63005-8EA8-ED11-AAD1-6045BD895D85,OV-NW-Voka Bilan 2023
9646B4F7-B51D-EA11-8109-001DD8B72B62,OV-P-GROEP N-StartUp-Bryo 2020
3BAA0411-71A0-EC11-B400-6045BD93CA86,OV-NW-Voka Vista - Community event 05/2022
