In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [4]:
df = pd.read_csv("Data/digital_marketing_campaign_dataset.csv"))
df.head()

Unnamed: 0,CustomerID,Age,Gender,Income,CampaignChannel,CampaignType,AdSpend,ClickThroughRate,ConversionRate,WebsiteVisits,PagesPerVisit,TimeOnSite,SocialShares,EmailOpens,EmailClicks,PreviousPurchases,LoyaltyPoints,AdvertisingPlatform,AdvertisingTool,Conversion
0,8000,56,Female,136912,Social Media,Awareness,6497.870068,0.043919,0.088031,0,2.399017,7.396803,19,6,9,4,688,IsConfid,ToolConfid,1
1,8001,69,Male,41760,Email,Retention,3898.668606,0.155725,0.182725,42,2.917138,5.352549,5,2,7,2,3459,IsConfid,ToolConfid,1
2,8002,46,Female,88456,PPC,Awareness,1546.429596,0.27749,0.076423,2,8.223619,13.794901,0,11,2,8,2337,IsConfid,ToolConfid,1
3,8003,32,Female,44085,PPC,Conversion,539.525936,0.137611,0.088004,47,4.540939,14.688363,89,2,2,0,2463,IsConfid,ToolConfid,1
4,8004,60,Female,83964,PPC,Conversion,1678.043573,0.252851,0.10994,0,2.046847,13.99337,6,6,6,8,4345,IsConfid,ToolConfid,1


In [12]:
features_for_lr = ['ClickThroughRate', 'ConversionRate', 'WebsiteVisits', 
                   'PagesPerVisit', 'TimeOnSite', 'SocialShares', 'EmailOpens', 'EmailClicks',
                   'PreviousPurchases', 'LoyaltyPoints']
# df = df.dropna(subset=features_for_lr + ['Conversion'])


X = df[features_for_lr]
y = df['Conversion']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler_lr = StandardScaler()
X_train_scaled = scaler_lr.fit_transform(X_train)
X_test_scaled = scaler_lr.transform(X_test)


lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)

X_all_scaled = scaler_lr.transform(df[features_for_lr])
df['ConversionProbability'] = lr.predict_proba(X_all_scaled)[:, 1]
df.head()

Unnamed: 0,CustomerID,Age,Gender,Income,CampaignChannel,CampaignType,AdSpend,ClickThroughRate,ConversionRate,WebsiteVisits,...,SocialShares,EmailOpens,EmailClicks,PreviousPurchases,LoyaltyPoints,AdvertisingPlatform,AdvertisingTool,Conversion,EmailEngagementRate,ConversionProbability
0,8000,56,Female,136912,Social Media,Awareness,6497.870068,0.043919,0.088031,0,...,19,6,9,4,688,IsConfid,ToolConfid,1,0.666667,0.677125
1,8001,69,Male,41760,Email,Retention,3898.668606,0.155725,0.182725,42,...,5,2,7,2,3459,IsConfid,ToolConfid,1,0.285714,0.918069
2,8002,46,Female,88456,PPC,Awareness,1546.429596,0.27749,0.076423,2,...,0,11,2,8,2337,IsConfid,ToolConfid,1,5.5,0.976577
3,8003,32,Female,44085,PPC,Conversion,539.525936,0.137611,0.088004,47,...,89,2,2,0,2463,IsConfid,ToolConfid,1,1.0,0.832414
4,8004,60,Female,83964,PPC,Conversion,1678.043573,0.252851,0.10994,0,...,6,6,6,8,4345,IsConfid,ToolConfid,1,1.0,0.974933


In [13]:
kpi_columns = ['ClickThroughRate', 'ConversionRate', 'WebsiteVisits', 
                'PagesPerVisit', 'TimeOnSite', 'SocialShares', 
               'EmailOpens', 'EmailClicks', 'ConversionProbability']
scaler_kpi = StandardScaler()
kpi_data = scaler_kpi.fit_transform(df[kpi_columns])

pca = PCA(n_components=1)
pca.fit(kpi_data)

pc1_loadings = pca.components_[0]

abs_loadings = np.abs(pc1_loadings)

weights = abs_loadings / np.sum(abs_loadings)

weights_df = pd.DataFrame({
    'KPI': kpi_columns,
    'Weight': weights
})
print("Derived Weights from PC1:")
print(weights_df)

Derived Weights from PC1:
                     KPI    Weight
0       ClickThroughRate  0.099609
1         ConversionRate  0.085913
2          WebsiteVisits  0.064663
3          PagesPerVisit  0.099681
4             TimeOnSite  0.116333
5           SocialShares  0.013445
6             EmailOpens  0.125475
7            EmailClicks  0.119757
8  ConversionProbability  0.275125


In [14]:
composite_scores_std = np.dot(kpi_data, weights)

scaler_score = MinMaxScaler(feature_range=(1, 10))
composite_scores_scaled = scaler_score.fit_transform(composite_scores_std.reshape(-1, 1)).flatten()

df['CampaignSuccessScore'] = composite_scores_scaled

print(df[['CampaignSuccessScore'] + kpi_columns].head())

   CampaignSuccessScore  ClickThroughRate  ConversionRate  WebsiteVisits  \
0              5.335735          0.043919        0.088031              0   
1              7.277359          0.155725        0.182725             42   
2              8.137215          0.277490        0.076423              2   
3              6.816588          0.137611        0.088004             47   
4              7.740471          0.252851        0.109940              0   

   PagesPerVisit  TimeOnSite  SocialShares  EmailOpens  EmailClicks  \
0       2.399017    7.396803            19           6            9   
1       2.917138    5.352549             5           2            7   
2       8.223619   13.794901             0          11            2   
3       4.540939   14.688363            89           2            2   
4       2.046847   13.993370             6           6            6   

   ConversionProbability  
0               0.677125  
1               0.918069  
2               0.976577  
3       