<a href="https://colab.research.google.com/github/SinnottKayleigh/B2B-Sales-Algos/blob/main/Synthetic_Engagement_Behaviour_(Basic).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Positive coefficient = client is more likely to have intent

In [4]:
import pandas as pd
import numpy as np

np.random.seed(42)

num_clients = 500

data = {
    'Client_ID': np.arange(1, num_clients + 1),
    'Website_Visits': np.random.poisson(lam=10, size=num_clients),  # Visits to FX pages (Poisson distribution)
    'Email_Open_Rate': np.random.beta(a=2, b=5, size=num_clients),  # Email open rate (Beta distribution)
    'Email_Clicks': np.random.poisson(lam=3, size=num_clients),     # Email clicks (Poisson distribution)
    'Event_Attendance': np.random.binomial(n=5, p=0.3, size=num_clients),  # Events attended (Binomial distribution)
    'Social_Media_Mentions': np.random.poisson(lam=5, size=num_clients),   # Social media mentions (Poisson distribution)
    'Likely_to_Use_Service': np.random.binomial(n=1, p=0.3, size=num_clients)  # Target variable (30% likely)
}

df = pd.DataFrame(data)

print(df.head())

   Client_ID  Website_Visits  Email_Open_Rate  Email_Clicks  Event_Attendance  \
0          1              12         0.060392             0                 1   
1          2               6         0.217676             2                 0   
2          3              11         0.423391             1                 2   
3          4              14         0.085754             4                 3   
4          5               7         0.241142             5                 0   

   Social_Media_Mentions  Likely_to_Use_Service  
0                      1                      1  
1                      6                      0  
2                      8                      0  
3                      1                      0  
4                      1                      0  


In [7]:
print(df.isnull().sum())

print(df.dtypes)

print(df.describe())

Client_ID                0
Website_Visits           0
Email_Open_Rate          0
Email_Clicks             0
Event_Attendance         0
Social_Media_Mentions    0
Likely_to_Use_Service    0
Company_Name             0
dtype: int64
Client_ID                  int64
Website_Visits             int64
Email_Open_Rate          float64
Email_Clicks               int64
Event_Attendance           int64
Social_Media_Mentions      int64
Likely_to_Use_Service      int64
Company_Name              object
dtype: object
        Client_ID  Website_Visits  Email_Open_Rate  Email_Clicks  \
count  500.000000      500.000000       500.000000    500.000000   
mean   250.500000        9.910000         0.285635      2.926000   
std    144.481833        3.228428         0.156134      1.651053   
min      1.000000        1.000000         0.007248      0.000000   
25%    125.750000        8.000000         0.164934      2.000000   
50%    250.500000       10.000000         0.261416      3.000000   
75%    375.250000

In [10]:
import plotly.express as px

fig_website = px.histogram(
    df,
    x='Website_Visits',
    nbins=20,
    title='Distribution of Website Visits',
    labels={'Website_Visits': 'Website Visits', 'count': 'Frequency'}
)

fig_website.show()

fig_email = px.histogram(
    df,
    x='Email_Clicks',
    nbins=20,
    title='Distribution of Email Clicks',
    labels={'Email_Clicks': 'Email Clicks', 'count': 'Frequency'}
)
fig_email.show()

In [12]:
fig_pair_plot = px.scatter_matrix(
    df,
    dimensions=['Website_Visits', 'Email_Clicks', 'Event_Attendance', 'Social_Media_Mentions'],
    color='Likely_to_Use_Service',
    title='Pair Plot of Engagement Features',
    labels={'Likely_to_Use_Service': 'Likely to Use Service'}
)

fig_pair_plot.show()

In [14]:
from sklearn.linear_model import LogisticRegression

X = df[['Website_Visits', 'Email_Open_Rate', 'Email_Clicks', 'Event_Attendance', 'Social_Media_Mentions']]
y = df['Likely_to_Use_Service']

model = LogisticRegression()
model.fit(X, y)

feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': model.coef_[0]
})

feature_importance = feature_importance.sort_values(by='Importance', ascending=False)

fig_feature_importance = px.bar(
    feature_importance,
    x='Feature',
    y='Importance',
    title='Feature Importance in Predicting Client Interest',
    labels={'Importance': 'Coefficient Value', 'Feature': 'Feature'},
    text='Importance'
)

fig_feature_importance.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig_feature_importance.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')

fig_feature_importance.show()

In [8]:
import random
import string

def generate_company_name():
    prefixes = ['Global', 'Alpha', 'Prime', 'Vertex', 'Summit', 'Pinnacle', 'Apex', 'Crest']
    suffixes = ['Capital', 'Partners', 'Investments', 'Advisors', 'Wealth', 'Management', 'Group', 'Holdings']
    return f"{random.choice(prefixes)} {random.choice(suffixes)}"

df['Company_Name'] = [generate_company_name() for _ in range(num_clients)]

print(df.head())

   Client_ID  Website_Visits  Email_Open_Rate  Email_Clicks  Event_Attendance  \
0          1              12         0.060392             0                 1   
1          2               6         0.217676             2                 0   
2          3              11         0.423391             1                 2   
3          4              14         0.085754             4                 3   
4          5               7         0.241142             5                 0   

   Social_Media_Mentions  Likely_to_Use_Service       Company_Name  
0                      1                      1   Prime Management  
1                      6                      0    Global Advisors  
2                      8                      0  Pinnacle Advisors  
3                      1                      0       Summit Group  
4                      1                      0  Pinnacle Holdings  


In [16]:
import plotly.express as px

model = LogisticRegression()
model.fit(X, y)
df['Predicted_Probability'] = model.predict_proba(X)[:, 1]  # Probability of being "Likely"

fig_scatter = px.scatter(
    df,
    x='Email_Clicks',  # Engagement metric (e.g., email clicks)
    y='Predicted_Probability',  # Likelihood of interest
    color='Likely_to_Use_Service',  # Color by target variable
    hover_data=['Company_Name', 'Website_Visits', 'Event_Attendance', 'Social_Media_Mentions'],  # Additional details
    title='Engagement vs Likelihood of Interest (Interactive Scatter Plot)',
    labels={
        'Email_Clicks': 'Email Clicks',
        'Predicted_Probability': 'Likelihood of Interest',
        'Likely_to_Use_Service': 'Likely to Use Service'
    }
)

fig_scatter.update_traces(marker=dict(size=10, opacity=0.7))
fig_scatter.update_layout(
    xaxis=dict(title='Email Clicks', showgrid=True),
    yaxis=dict(title='Likelihood of Interest', showgrid=True),
    hovermode='closest'
)

fig_scatter.show()

In [18]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

engagement_metrics = ['Email_Clicks', 'Website_Visits', 'Event_Attendance', 'Social_Media_Mentions']

fig_dropdown = go.Figure()

for metric in engagement_metrics:
    fig_dropdown.add_trace(
        go.Scatter(
            x=df[metric],
            y=df['Predicted_Probability'],
            mode='markers',
            visible=False,  # Only one trace will be visible at a time
            marker=dict(size=10, opacity=0.7),
            hovertext=df['Company_Name'],
            name=metric
        )
    )

fig_dropdown.data[0].visible = True

dropdown_buttons = []
for i, metric in enumerate(engagement_metrics):
    dropdown_buttons.append(
        dict(
            label=metric,
            method='update',
            args=[{'visible': [False] * len(engagement_metrics)},  # Hide all traces
                  {'title': f'Engagement vs Likelihood of Interest: {metric}'}]
        )
    )
    dropdown_buttons[-1]['args'][0]['visible'][i] = True  # Show the selected trace

fig_dropdown.update_layout(
    updatemenus=[{
        'buttons': dropdown_buttons,
        'direction': 'down',
        'showactive': True,
        'x': 0.1,
        'y': 1.2
    }],
    xaxis=dict(title='Engagement Metric'),
    yaxis=dict(title='Likelihood of Interest'),
    hovermode='closest'
)

fig_dropdown.show()