In [85]:
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sklearn.metrics.pairwise import cosine_similarity

In [86]:
load_dotenv()
DB_URL = os.getenv("DB_URL")
engine = create_engine(DB_URL)
# select 26 rows from SQL table to insert in dataframe.
query = """
select c.ContactPersoonId, i.CampagneId, i.CampagneNaam, a.Ondernemingsaard, a.Ondernemingstype, a.PrimaireActiviteit, f.Naam as Functie
from Contactfiche c
join Account a on a.AccountId = c.AccountId
join Inschrijving i on i.ContactficheId = c.ContactPersoonId
join ContactficheFunctie cf on cf.ContactpersoonId = c.ContactPersoonId
join Functie f on f.FunctieId = cf.FunctieId
where CampagneId is not null;
"""
df = pd.read_sql(query, engine)
df.set_index('ContactPersoonId', inplace=True)
df["rating"] = 1

df.head()

Unnamed: 0_level_0,CampagneId,CampagneNaam,Ondernemingsaard,Ondernemingstype,PrimaireActiviteit,Functie,rating
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
D9303EA2-57E5-EB11-8121-001DD8B72B61,F090DEF9-2A5A-EB11-811A-001DD8B72B62,OV - Kick-Off Community Bouw en Vastgoed,Diensten,Familiebedrijf,Vastgoed,Medewerker,1
451DF235-4B73-E111-B43A-00505680000A,ED956944-5F6F-EA11-8110-001DD8B72B62,OV-Corona Round Tables - Sales,Diensten,Bedrijf,Consultancy,Bedrijfsleider,1
4235A499-B670-ED11-9561-6045BD895CDC,317CD023-2B1E-ED11-B83D-000D3AAD783A,OV-NW-Nieuwjaarsreceptie regio Oost-Vlaanderen,Diensten,Bedrijf,Milieu,Verantwoordelijke Commercieel,1
22678F6B-267C-EB11-811D-001DD8B72B62,4CC0CBF3-6056-EB11-8117-001DD8B72B61,OV-Infosessie Start2Export2 Mexico,Productie & Diensten,Bedrijf,Overige industrie & diensten,Medewerker Commercieel,1
FF17ED97-0942-E611-80D6-005056B06EC4,96ACAD9A-E7E3-EB11-8124-001DD8B72B62,OV-Start2Export2 Mexico traject,Diensten,Bedrijf,Voeding,Bedrijfsleider,1


In [87]:
df_pivot = pd.pivot_table(df, index='ContactPersoonId', columns=['Ondernemingsaard', 'Ondernemingstype', 'PrimaireActiviteit', 'Functie'], values='rating', fill_value = 0)
df_pivot
# df_pivot
# df_pivot[df_pivot[' OV-JO-Breakfastclub December 2019'] == 1]
# df_contacts_features = df[['Ondernemingsaard', 'Ondernemingstype', 'PrimaireActiviteit']]
# df_contacts_features = pd.get_dummies(df_contacts_features)
# df_contacts_features

Ondernemingsaard,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,Diensten,...,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten,Productie & Diensten
Ondernemingstype,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,Bedrijf,...,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Social Profit,Vrije beroepen,Vrije beroepen,Vrije beroepen
PrimaireActiviteit,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,Accountancy & boekhouding,...,Verenigingen en maatschappelijke organisaties,Zorg,Zorg,Zorg,Zorg,Zorg,Zorg,Accountancy & boekhouding,Accountancy & boekhouding,Farmacie
Functie,Bedrijfsleider,Bestuurder,Contact Lidmaatschap,Directie of kaderlid,Management Assistent,Medewerker,Medewerker Commercieel,Medewerker Communicatie,Medewerker Financieel,Medewerker Juridische dienst,...,Verantwoordelijke Commercieel,Bedrijfsleider,Contact HealthCommunity,Contact Lidmaatschap,Medewerker Financieel,Verantwoordelijke Commercieel,Verantwoordelijke Productie,Medewerker Duurzaamheid / Milieu & Energie,"Verantwoordelijke Duurzaamheid, Milieu & Energie",Verantwoordelijke Financieel
ContactPersoonId,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
00169619-E322-E911-80FB-001DD8B72B62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0017416A-2C6E-E111-B43A-00505680000A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0019C15A-6481-E611-80DE-001DD8B72B61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00223C8E-467F-E311-BBFD-005056B06EB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00231824-53EA-ED11-8849-6045BD895420,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FFF0C643-DAE4-ED11-A7C7-000D3A4AB78E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFF68074-EB93-E911-80FF-001DD8B72B62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFF68536-5DE0-E111-8A53-984BE17C2819,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FFFAE2B6-11D5-EC11-A7B5-000D3ABD1F85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [88]:
# df_pivot[df_pivot["ContactPersoonId"] == "DA252429-E5A6-ED11-AAD1-6045BD8956C9"]
# df_pivot["ContactPersoonId"]
df_pivot.iloc[1]

Ondernemingsaard      Ondernemingstype  PrimaireActiviteit         Functie                                         
Diensten              Bedrijf           Accountancy & boekhouding  Bedrijfsleider                                      0.0
                                                                   Bestuurder                                          0.0
                                                                   Contact Lidmaatschap                                0.0
                                                                   Directie of kaderlid                                0.0
                                                                   Management Assistent                                0.0
                                                                                                                      ... 
Productie & Diensten  Social Profit     Zorg                       Verantwoordelijke Commercieel                       0.0
                       

In [89]:
# similarity_matrix_df = pd.DataFrame(similarity_matrix, index=df_pivot.index, columns=df_pivot.index)
# similarity_matrix_df.to_csv('../../simm.csv')
# similarity_matrix_df

array([0., 0., 0., ..., 0., 0., 0.])

In [102]:
# select_contact = "00169619-E322-E911-80FB-001DD8B72B62"
select_contact = "9F266F3D-0A62-EA11-810D-001DD8B72B61"
# similarities = similarity_matrix_df[select_contact].drop(select_contact)
similarity_matrix = cosine_similarity(df_pivot, [df_pivot.loc[select_contact]]).reshape(1,-1)[0]
similarities = df_pivot.index.join(similarity_matrix)
similarities = pd.DataFrame({'ContactPersoonId':df_pivot.index, 'sim':similarity_matrix}).set_index('ContactPersoonId')
# weights = similarities/similarities.sum()
similar_users = similarities[similarities['sim'] > 0.5].sort_values(by='sim', ascending=False)
similar_users

Unnamed: 0_level_0,sim
ContactPersoonId,Unnamed: 1_level_1
9F569D9B-E96A-E111-B43A-00505680000A,1.0
08C51AAB-414C-E711-80E7-001DD8B72B61,0.816497
2B699EE9-656D-E111-B43A-00505680000A,0.816497
7FC88E28-6C70-E111-B43A-00505680000A,0.816497


In [91]:
done_campaigns = df.loc[df.index == select_contact]['CampagneId']
done_campaigns

ContactPersoonId
9F266F3D-0A62-EA11-810D-001DD8B72B61    A3E61415-BC1D-EB11-8115-001DD8B72B61
9F266F3D-0A62-EA11-810D-001DD8B72B61    B5DB7D00-F095-EA11-8111-001DD8B72B62
9F266F3D-0A62-EA11-810D-001DD8B72B61    29668F0F-6C86-EB11-811A-001DD8B72B61
Name: CampagneId, dtype: object

In [92]:
# Movies that similar users watched.
similar_campagnes = df[df.index.isin(similar_users.index)].replace(0, np.nan).dropna(axis=1, how='all')

# similar_user_movies = movies_ratings_pivot[movies_ratings_pivot.index.isin(top similar_users.index)].dropna(axis=1, how='all')
# similar_campagnes = similar_campagnes.drop(select_contact, axis=0)
similar_campagnes

Unnamed: 0_level_0,CampagneId,CampagneNaam,Ondernemingsaard,Ondernemingstype,PrimaireActiviteit,Functie,rating
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
26CCB813-CB21-EC11-8124-001DD8B72B61,B099F5E1-0B49-EC11-8C62-6045BD8D2834,OV-MATCH-Welkom@Voka-Februari 2022,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
204B37EF-0487-EB11-811A-001DD8B72B61,720428E4-F3CC-EB11-8120-001DD8B72B61,OV-Groeien door overname,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
37EC055D-4419-E711-80E4-001DD8B72B62,B9011A48-AE20-EA11-8109-001DD8B72B62,OV-Plato 2020 - Infosessie en webinar,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
204B37EF-0487-EB11-811A-001DD8B72B61,2DE56318-A0E8-EC11-BB3D-00224880A91F,OV-Digital Finance Lab-2,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
204B37EF-0487-EB11-811A-001DD8B72B61,FE7FC2C2-F0CC-EB11-8120-001DD8B72B61,"OV-BTW Class: Btw en douane, aandachtspunten b...",Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018,Diensten,Bedrijf,Telecom & IT,Verantwoordelijke Administratie,1


In [93]:
similar_campagnes[['CampagneId', 'CampagneNaam']]


Unnamed: 0_level_0,CampagneId,CampagneNaam
ContactPersoonId,Unnamed: 1_level_1,Unnamed: 2_level_1
26CCB813-CB21-EC11-8124-001DD8B72B61,B099F5E1-0B49-EC11-8C62-6045BD8D2834,OV-MATCH-Welkom@Voka-Februari 2022
204B37EF-0487-EB11-811A-001DD8B72B61,720428E4-F3CC-EB11-8120-001DD8B72B61,OV-Groeien door overname
37EC055D-4419-E711-80E4-001DD8B72B62,B9011A48-AE20-EA11-8109-001DD8B72B62,OV-Plato 2020 - Infosessie en webinar
204B37EF-0487-EB11-811A-001DD8B72B61,2DE56318-A0E8-EC11-BB3D-00224880A91F,OV-Digital Finance Lab-2
204B37EF-0487-EB11-811A-001DD8B72B61,FE7FC2C2-F0CC-EB11-8120-001DD8B72B61,"OV-BTW Class: Btw en douane, aandachtspunten b..."
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018
193EB48E-C58E-E511-B0F9-005056B06EC4,B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018


In [94]:
similar_campagnes_not_done = similar_campagnes[~similar_campagnes['CampagneId'].isin(done_campaigns)][['CampagneId', 'CampagneNaam']]
similar_campagnes_not_done.set_index('CampagneId', inplace=True)
similar_campagnes_not_done.drop_duplicates(inplace=True)
similar_campagnes_not_done

Unnamed: 0_level_0,CampagneNaam
CampagneId,Unnamed: 1_level_1
B099F5E1-0B49-EC11-8C62-6045BD8D2834,OV-MATCH-Welkom@Voka-Februari 2022
720428E4-F3CC-EB11-8120-001DD8B72B61,OV-Groeien door overname
B9011A48-AE20-EA11-8109-001DD8B72B62,OV-Plato 2020 - Infosessie en webinar
2DE56318-A0E8-EC11-BB3D-00224880A91F,OV-Digital Finance Lab-2
FE7FC2C2-F0CC-EB11-8120-001DD8B72B61,"OV-BTW Class: Btw en douane, aandachtspunten b..."
B3FA469C-6CB5-E711-80EC-001DD8B72B62,OV-NW-Voka connect Vlaamse Ardennen 2018
1C7F0895-EB6B-ED11-9561-6045BD895B5A,OV-Commercieel denken en doen voor niet commer...
3BA33E76-B8B4-EC11-983F-00224883C04D,OV-Management Assistant Day 2023
210CE445-B4A3-ED11-AAD1-6045BD895D85,OV-NW-Voka Update-Big Refresh-najaar 2023
8F59ADE1-8A92-ED11-AAD1-6045BD895CDC,OV-infosessie Welt - Verzuim
