In [10]:
# scikit-surprise
# https://learn.microsoft.com/en-US/cpp/windows/latest-supported-vc-redist?view=msvc-170#visual-studio-2015-2017-2019-and-2022
# https://stackoverflow.com/questions/61365790/error-could-not-build-wheels-for-scipy-which-use-pep-517-and-cannot-be-installe
# pip install --upgrade pip setuptools wheel / pip3 install scikit-surprise

In [11]:
import os
import sqlalchemy
import pandas as pd
from dotenv import load_dotenv 
from surprise import Reader, Dataset
from sqlalchemy import create_engine, text
from sklearn.preprocessing import OneHotEncoder
from surprise.model_selection import train_test_split
from surprise import KNNBasic

In [12]:
load_dotenv()
DB_URL = os.getenv("DB_URL")
engine = create_engine(DB_URL)

try:
    connection = engine.connect()
    print("Successfully connected to the database")
except Exception as e:
    print(f"Failed to connect to the database: {e}")

print(f"SQLAlchemy version: {sqlalchemy.__version__}")

Successfully connected to the database
SQLAlchemy version: 2.0.21


In [13]:
query = text('SELECT * FROM epic_5')
df = pd.read_sql_query(query, connection)
df.head()

Unnamed: 0,PersoonId,CampagneId,aantal_sessies,aantal_bezoeken,SoortCampagne,TypeCampagne,ThemaDuurzaamheid,ThemaFinancieelFiscaal,ThemaInnovatie,ThemaInternationaalOndernemen,ThemaMobiliteit,ThemaOmgeving,ThemaSalesMarketingCommunicatie,ThemaStrategieEnAlgemeenManagement,ThemaTalent,ThemaWelzijn
0,E03DDB2A-6572-EA11-810E-001DD8B72B61,C6D72260-E451-EC11-8C62-000D3ABFCF4A,3,1,Online,Netwerkevenement,1,1,1,1,0,1,1,1,0,0
1,15D63465-C159-E811-80F0-001DD8B72B62,23FA2D99-768E-E811-80F3-001DD8B72B61,1,0,Offline,Netwerkevenement,0,0,0,0,0,0,0,0,0,0
2,C2856AB5-4C49-E911-80FC-001DD8B72B62,FEEB8AD3-2FD4-EB11-8124-001DD8B72B62,3,0,Offline,Opleiding,0,0,0,0,0,0,0,0,1,0
3,B7CF541A-22B7-E111-A45C-00505680000A,AF7C48A5-1A54-EB11-811A-001DD8B72B62,1,2,Offline,Opleiding,1,0,0,0,0,0,1,0,1,1
4,140102CF-E267-E111-A00F-00505680000A,C826EB93-095A-EA11-810F-001DD8B72B62,1,0,Offline,Opleiding,0,0,0,0,0,0,0,0,0,0


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48724 entries, 0 to 48723
Data columns (total 16 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   PersoonId                           48724 non-null  object
 1   CampagneId                          48724 non-null  object
 2   aantal_sessies                      48724 non-null  int64 
 3   aantal_bezoeken                     48724 non-null  int64 
 4   SoortCampagne                       48724 non-null  object
 5   TypeCampagne                        48724 non-null  object
 6   ThemaDuurzaamheid                   48724 non-null  int64 
 7   ThemaFinancieelFiscaal              48724 non-null  int64 
 8   ThemaInnovatie                      48724 non-null  int64 
 9   ThemaInternationaalOndernemen       48724 non-null  int64 
 10  ThemaMobiliteit                     48724 non-null  int64 
 11  ThemaOmgeving                       48724 non-null  in

In [15]:
encoder = OneHotEncoder(sparse_output=False)

encoded_columns = encoder.fit_transform(df[['SoortCampagne', 'TypeCampagne']])
feature_names = encoder.get_feature_names_out(['SoortCampagne', 'TypeCampagne'])

encoded_df = pd.DataFrame(encoded_columns, columns=feature_names)
encoded_df = encoded_df.astype("int64")

df = pd.concat([df, encoded_df], axis=1)
df = df.drop(['SoortCampagne', 'TypeCampagne'], axis=1)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48724 entries, 0 to 48723
Data columns (total 23 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   PersoonId                           48724 non-null  object
 1   CampagneId                          48724 non-null  object
 2   aantal_sessies                      48724 non-null  int64 
 3   aantal_bezoeken                     48724 non-null  int64 
 4   ThemaDuurzaamheid                   48724 non-null  int64 
 5   ThemaFinancieelFiscaal              48724 non-null  int64 
 6   ThemaInnovatie                      48724 non-null  int64 
 7   ThemaInternationaalOndernemen       48724 non-null  int64 
 8   ThemaMobiliteit                     48724 non-null  int64 
 9   ThemaOmgeving                       48724 non-null  int64 
 10  ThemaSalesMarketingCommunicatie     48724 non-null  int64 
 11  ThemaStrategieEnAlgemeenManagement  48724 non-null  in

In [16]:
reader = Reader(rating_scale=(0, int(df.aantal_sessies.max())))
data = Dataset.load_from_df(df[['PersoonId', 'CampagneId', 'aantal_sessies']], reader)

trainset, testset = train_test_split(data, test_size=.25)

In [17]:
# https://blog.stackademic.com/recommender-systems-with-python-code-examples-8d2ed0995f9a
# hybrid model met content based filtering (cold start oplossing) en collaborative filtering

In [18]:
reader = Reader(rating_scale=(0, int(df.aantal_sessies.max())))
data = Dataset.load_from_df(df[['PersoonId', 'CampagneId', 'aantal_sessies']], reader)

trainset, testset = train_test_split(data, test_size=.25)

# Use user-based KNN
sim_options = {'name': 'pearson', 'user_based': False}
model = KNNBasic(sim_options=sim_options)

# Train the model
model.fit(trainset)

# Make predictions on the test set
predictions = model.test(testset)

def get_top_n_recommendations(model, campaign_id, n=10):
    # Find users who haven't interacted with the given campaign
    anti_testset = trainset.build_anti_testset()
    anti_testset_campaign = filter(lambda x: x[1] == campaign_id, anti_testset)

    # Predict ratings for users on the given campaign
    predictions = model.test(anti_testset_campaign)

    # Get the top N recommendations
    top_n = [pred.uid for pred in sorted(predictions, key=lambda x: x.est, reverse=True)[:n]]

    return top_n

campagne = "08563B4A-6871-ED11-9561-6045BD8952CE"
top_recommendations = get_top_n_recommendations(model, campagne)
top_recommendations

Computing the pearson similarity matrix...
Done computing similarity matrix.


['6CE11D6E-4969-E111-B43A-00505680000A',
 '3AE60425-5269-E111-B43A-00505680000A',
 '36466D32-0E8F-EC11-B400-000D3A263845',
 '8468A5AF-85B7-E911-8104-001DD8B72B62',
 'A72FDF33-85B7-E911-8104-001DD8B72B62',
 '52B4E699-5636-E311-8A47-005056B06EC4',
 'FEC4AEF4-2E8F-EC11-B400-000D3A25AC23',
 'C50BFA06-86B7-E911-8104-001DD8B72B62',
 'E92F31E5-EB67-E111-A00F-00505680000A',
 '05099242-4D69-E111-B43A-00505680000A']