In [46]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [47]:
df=pd.read_csv('digital_marketing_campaign_dataset.csv')

In [48]:
df.head(2)

Unnamed: 0,CustomerID,Age,Gender,Income,CampaignChannel,CampaignType,AdSpend,ClickThroughRate,ConversionRate,WebsiteVisits,PagesPerVisit,TimeOnSite,SocialShares,EmailOpens,EmailClicks,PreviousPurchases,LoyaltyPoints,AdvertisingPlatform,AdvertisingTool,Conversion
0,8000,56,Female,136912,Social Media,Awareness,6497.870068,0.043919,0.088031,0,2.399017,7.396803,19,6,9,4,688,IsConfid,ToolConfid,1
1,8001,69,Male,41760,Email,Retention,3898.668606,0.155725,0.182725,42,2.917138,5.352549,5,2,7,2,3459,IsConfid,ToolConfid,1


# About Data set


**CustomerID: Unique identifier for each customer.**

**Age: Age of the customer.**

**Gender: Gender of the customer (Male/Female).**

**Income: Annual income of the customer in USD.**


**CampaignChannel: The channel through which the marketing campaign is delivered (Email, Social Media, SEO, PPC, Referral).**

**CampaignType: Type of the marketing campaign (Awareness, Consideration, Conversion, Retention).**

**AdSpend: Amount spent on the marketing campaign in USD.**

**ClickThroughRate: Rate at which customers click on the marketing content.**

**ConversionRate: Rate at which clicks convert to desired actions (e.g., purchases).**

**AdvertisingPlatform: Confidential.**

**AdvertisingTool: Confidential.**

**PreviousPurchases: Number of previous purchases made by the customer.**

**LoyaltyPoints: Number of loyalty points accumulated by the customer.**

**Conversion: Binary variable indicating whether the customer converted (1) or not (0).**

# 1. Predictive modeling of customer conversion rates.
*in this we will develop a model based on these parameters to predict whether the customer will proceed to make a purchase*

In [53]:
df.columns

Index(['CustomerID', 'Age', 'Gender', 'Income', 'CampaignChannel',
       'CampaignType', 'AdSpend', 'ClickThroughRate', 'ConversionRate',
       'WebsiteVisits', 'PagesPerVisit', 'TimeOnSite', 'SocialShares',
       'EmailOpens', 'EmailClicks', 'PreviousPurchases', 'LoyaltyPoints',
       'AdvertisingPlatform', 'AdvertisingTool', 'Conversion'],
      dtype='object')

In [54]:
df.drop(['AdvertisingPlatform','AdvertisingTool'],axis='columns',inplace=True)

In [55]:
df.head(1)

Unnamed: 0,CustomerID,Age,Gender,Income,CampaignChannel,CampaignType,AdSpend,ClickThroughRate,ConversionRate,WebsiteVisits,PagesPerVisit,TimeOnSite,SocialShares,EmailOpens,EmailClicks,PreviousPurchases,LoyaltyPoints,Conversion
0,8000,56,Female,136912,Social Media,Awareness,6497.870068,0.043919,0.088031,0,2.399017,7.396803,19,6,9,4,688,1


In [56]:
df.Gender.unique()

array(['Female', 'Male'], dtype=object)

In [57]:
df.CampaignChannel.unique()

array(['Social Media', 'Email', 'PPC', 'Referral', 'SEO'], dtype=object)

In [58]:
df.CampaignType.unique()

array(['Awareness', 'Retention', 'Conversion', 'Consideration'],
      dtype=object)

In [59]:
df.head(1)

Unnamed: 0,CustomerID,Age,Gender,Income,CampaignChannel,CampaignType,AdSpend,ClickThroughRate,ConversionRate,WebsiteVisits,PagesPerVisit,TimeOnSite,SocialShares,EmailOpens,EmailClicks,PreviousPurchases,LoyaltyPoints,Conversion
0,8000,56,Female,136912,Social Media,Awareness,6497.870068,0.043919,0.088031,0,2.399017,7.396803,19,6,9,4,688,1


In [60]:
df.shape

(8000, 18)

In [61]:
df1=pd.get_dummies(df,columns=['Gender','CampaignChannel','CampaignType'],drop_first=True).astype(int)

In [62]:
df1.shape

(8000, 23)

In [63]:
df1.head(1)

Unnamed: 0,CustomerID,Age,Income,AdSpend,ClickThroughRate,ConversionRate,WebsiteVisits,PagesPerVisit,TimeOnSite,SocialShares,...,LoyaltyPoints,Conversion,Gender_Male,CampaignChannel_PPC,CampaignChannel_Referral,CampaignChannel_SEO,CampaignChannel_Social Media,CampaignType_Consideration,CampaignType_Conversion,CampaignType_Retention
0,8000,56,136912,6497,0,0,0,2,7,19,...,688,1,0,0,0,0,1,0,0,0


In [64]:
from sklearn.model_selection import train_test_split

In [65]:
from sklearn.linear_model import LinearRegression

In [66]:
from sklearn.linear_model import LogisticRegression

In [67]:
from sklearn.tree import DecisionTreeClassifier

In [68]:
from sklearn.ensemble import RandomForestClassifier

In [69]:
from sklearn.naive_bayes import GaussianNB

In [70]:
from sklearn.svm import SVC

In [71]:
from sklearn.model_selection import KFold

In [72]:
from sklearn.model_selection import cross_val_score

In [73]:
inputs=df1.drop('Conversion',axis='columns')
output=df1.Conversion

In [74]:
X_train,X_test,y_train,y_test=train_test_split(inputs,output,test_size=0.15,random_state=20)

In [75]:
linear_score=cross_val_score(LinearRegression(), X_train,y_train,cv=5)
linear_score.mean()

0.1311716519487668

In [76]:
Decision_score=cross_val_score(DecisionTreeClassifier(), X_train,y_train,cv=5)
Decision_score.mean()

0.8672058823529412

In [77]:
Logistic_score=cross_val_score(LogisticRegression(max_iter=100), X_train,y_train,cv=5)
Logistic_score.mean()

0.8770588235294119

In [78]:
import numpy as np

In [79]:
svc_score=cross_val_score(SVC(), X_train,y_train,cv=5)
svc_score.mean()

0.8755882352941177

In [80]:
Gaussian_score=cross_val_score(GaussianNB(), X_train,y_train,cv=5)
Gaussian_score.mean()

0.879264705882353

In [81]:
Gaussian_score=cross_val_score(GaussianNB(), X_train,y_train,cv=5)
Gaussian_score.mean()

0.879264705882353

In [82]:
from sklearn.neighbors import KNeighborsClassifier

In [83]:
Knn_score=cross_val_score(KNeighborsClassifier(), X_train,y_train,cv=5)
Knn_score.mean()

0.8655882352941177

In [84]:
Random_score=cross_val_score(RandomForestClassifier(), X_train,y_train,cv=5)
Random_score.mean()

0.8873529411764706

In [85]:
RandomForestClassifier

sklearn.ensemble._forest.RandomForestClassifier

In [86]:
Random_score=cross_val_score(RandomForestClassifier(n_estimators=150,criterion='entropy',random_state=10), X_train,y_train,cv=5)
Random_score.mean()

0.8876470588235292

# Finding Best Parameters

In [88]:
from sklearn.model_selection import GridSearchCV

In [89]:
Parameter_Tuning=GridSearchCV(RandomForestClassifier(), {
    'n_estimators':[150,175,200],
    'criterion':['gini','entropy'],
    'random_state':[10,20]
},cv=5,return_train_score=False)

Parameter_Tuning  

In [None]:
Parameter_Tuning.fit(X_train,y_train)

In [None]:
results=Parameter_Tuning.cv_results_

In [None]:
final=pd.DataFrame(results)

In [None]:
final