#**Campaign Effectiveness Analysis**

1. Compare the effectiveness of different campaigns in terms of lead generation and conversion rate.
2. Evaluate the effectiveness of different marketing sources (Source) in generating qualified leads.


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [None]:
pd.set_option('display.max_columns', None) #display all columns (so that they are not hidden with «…»)

In [None]:
deals = pd.read_pickle('deals_df.pkl')
spend = pd.read_pickle('spend_df.pkl')

In [None]:
deals.head()

Unnamed: 0,Id,Deal Owner Name,Closing Date,Quality,Stage,Lost Reason,Campaign,SLA,Content,Term,Source,Payment Type,Product,Education Type,Created Time,Course duration,Months of study,Initial Amount Paid,Offer Total Amount,Contact Name,City,Level of Deutsch,_open_deal,Payment Category,Offer Category,_SLA_hours
0,5805028000056864695,Ben Hall,NaT,Unknown,New Lead,Unknown,03.07.23women,0 days 00:00:00,v16,women,Facebook Ads,Unknown,Unknown,Unknown,2024-06-21 15:30:00,0,0,,,5805028000056849495,Unknown,Unknown,True,Unknown,Unknown,0.0
1,5805028000056859489,Ulysses Adams,NaT,Unknown,New Lead,Unknown,Unknown,0 days 00:00:00,Unknown,Unknown,Organic,Unknown,Web Developer,Morning,2024-06-21 15:23:00,6,0,0.0,2000.0,5805028000056834471,Unknown,Unknown,True,No Payment,Regular Offer,0.0
2,5805028000056832357,Ulysses Adams,2024-06-21,D - Non Target,Lost,Non target,engwien_AT,0 days 00:26:43,b1-at,21_06_2024,Telegram posts,Unknown,Unknown,Unknown,2024-06-21 14:45:00,0,0,,,5805028000056854421,Unknown,Unknown,False,Unknown,Unknown,0.445278
3,5805028000056824246,Eva Kent,2024-06-21,E - Non Qualified,Lost,Invalid number,04.07.23recentlymoved_DE,0 days 01:00:03.999999999,bloggersvideo14com,recentlymoved,Facebook Ads,Unknown,Unknown,Unknown,2024-06-21 13:32:00,0,0,,,5805028000056889351,Unknown,Unknown,False,Unknown,Unknown,1.001111
4,5805028000056873292,Ben Hall,2024-06-21,D - Non Target,Lost,Non target,discovery_DE,0 days 00:53:12.000000001,website,Unknown,Google Ads,Unknown,Unknown,Unknown,2024-06-21 13:21:00,0,0,,,5805028000056876176,Unknown,Unknown,False,Unknown,Unknown,0.886667


In [None]:
spend.head()

Unnamed: 0,Date,Source,Campaign,Impressions,Spend,Clicks,AdGroup,Ad
0,2023-07-03,Google Ads,gen_analyst_DE,6,0.0,0,Unknown,Unknown
1,2023-07-03,Google Ads,performancemax_eng_DE,4,0.01,1,Unknown,Unknown
2,2023-07-03,Facebook Ads,Unknown,0,0.0,0,Unknown,Unknown
3,2023-07-03,Google Ads,Unknown,0,0.0,0,Unknown,Unknown
4,2023-07-03,CRM,Unknown,0,0.0,0,Unknown,Unknown


###**1. Lead Generation & Conversion by Campaigns**

In [None]:
# Parameters
TOP_N = 10
EXCLUDE_UNKNOWN = True     # exclude 'Unknown'
EXCLUDE_OFFLINE = True     # exclude 'Offline'

In [None]:
# Copies + filters for data consistency
d = deals.copy()
s = spend.copy()

if EXCLUDE_UNKNOWN:
    d = d[(d['Campaign'].astype(str) != 'Unknown') & (d['Source'].astype(str) != 'Unknown')]
    s = s[(s['Campaign'].astype(str) != 'Unknown') & (s['Source'].astype(str) != 'Unknown')]

if EXCLUDE_OFFLINE:
    d = d[d['Source'].astype(str) != 'Offline']
    s = s[s['Source'].astype(str) != 'Offline']

In [None]:
# Aggregations: Campaigns
camp_deals = (d.groupby('Campaign', observed=False).agg(total_leads=('Id','count'),
                     completed_leads=('Stage', lambda x: (x=='Payment Done').sum()),
                     failed_leads=('Stage',   lambda x: (x=='Lost').sum())).reset_index())

camp_deals['conversion_rate'] = np.where(camp_deals['total_leads']>0, camp_deals['completed_leads']/camp_deals['total_leads'], np.nan)

In [None]:
# Add clicks/impressions/spend (optional, may be useful later)
camp_spend = (s.groupby('Campaign', observed=False).agg(Clicks=('Clicks','sum'), Impressions=('Impressions','sum'), Spend=('Spend','sum')).reset_index())
camp = camp_deals.merge(camp_spend, on='Campaign', how='left')

In [None]:
bubble_df = camp.copy()
bubble_df['conv_pct'] = (bubble_df['conversion_rate'] * 100).astype(float)

# ТОП-10 total_leads
top10 = bubble_df.nlargest(10, 'total_leads')["Campaign"].tolist()

bubble_df['color_label'] = bubble_df['Campaign'].where(bubble_df['Campaign'].isin(top10), "Other campaigns")

modern_colors = [
    "#2E2E2E",  # графит
    "#6C757D",  # стальной серый
    "#ADB5BD",  # серебристый
    "#FFB400",  # золотистый акцент
    "#F94144",  # красный акцент
    "#577590",  # холодный синий
    "#90BE6D",  # зелёный акцент
    "#F3722C",  # тёплый оранжевый
    "#277DA1",  # бирюзовый акцент
    "#FF61A6"   # розовый акцент
    ]

color_map = {camp: modern_colors[i] for i, camp in enumerate(top10)}
color_map["Other campaigns"] = "#E0E0E0"  # серый для остальных

fig_bubble = px.scatter(
    bubble_df,
    x='total_leads',
    y='conv_pct',
    size='completed_leads',
    color='color_label',
    hover_data=['Campaign','total_leads','completed_leads','failed_leads'],
    size_max=110,
    title="Campaign Effectiveness",
    labels={'total_leads': 'Total Leads', 'conv_pct': 'Conversion Rate (%)'},
    color_discrete_map=color_map,
    category_orders={"color_label": top10 + ["Other campaigns"]})

fig_bubble.update_traces(marker=dict(line=dict(width=1.5, color="#2E2E2E"), opacity=0.9), text=None)

fig_bubble.update_layout(
    template="plotly_white",
    legend_title_text="Top-10 campaigns",
    legend=dict(
        orientation="v",
        y=0.5, yanchor="middle",
        x=1.02, xanchor="left"),
    margin=dict(l=70, r=180, t=60, b=60),
    xaxis_title="Total Leals",
    yaxis_title="Conversion Rate (%)")

fig_bubble.show()


Ось X — количество лидов.

Ось Y — коэффициент конверсии (%).

Размер круга — число успешных сделок.

Цвет — цвет кампании (шкала справа).

**Выводы:**

- Кампания performancemax_digitalmarkt_ru_DE — лидер по объёму лидов (>2600), но конверсия низкая (4.2%). Это «лидогенератор», но с большими потерями.

- Кампании youtube_shorts_DE и wide_DE также показали высокий объём (1600+), но конверсия ещё ниже (3–3.2%).

- Есть кампании с малым объёмом, но более высокой конверсией (>5%), например brand_search_eng_DE, что указывает на более точное попадание в аудиторию.

**Рекомендации:**

- Масштабировать кампании с высокой конверсией даже при меньших объёмах (ниша, точное попадание).

- Оптимизировать лидогенераторы с низкой конверсией: улучшить сегментацию, корректировать креативы и работу с лидами.

- Убрать/заморозить кампании с крайне низкой конверсией (<3%) и низким объёмом.

In [None]:
# sort campaigns and calculate percentages for charts
camp_sorted = (camp.sort_values('total_leads', ascending=False).reset_index(drop=True))

# just in case: protect against division by zero and convert to %
camp_sorted['conv_pct'] = (camp_sorted['conversion_rate']*100).round(1).fillna(0)

In [None]:
# TOP-N BAR + CONVERSION LINE with LABELS

palette = {
    "yellow": "#FFC300",  # яркий жёлтый для Successful Deals
    "green": "#2CA02C",  # Conversion
    "darkgrey": "#3A3A3A", # графитовый серый для Total Leads
    "line":   "#B0B0B0",
    "char":   "#2E2E2E",
    "bg":     "rgba(0,0,0,0)"
}

TOP_N = 10
top_camp = camp_sorted.head(TOP_N).copy()
top_camp['conv_pct'] = (top_camp['conversion_rate'] * 100).round(1)

y1_max = float(max(top_camp['total_leads'].max(), top_camp['completed_leads'].max())) * 1.15
y2_max = float(max(5.0, top_camp['conv_pct'].max())) * 1.20

fig_top = go.Figure()

# 1) Total Leads
fig_top.add_trace(go.Bar(
    x=top_camp['Campaign'],
    y=top_camp['total_leads'],
    name='Total Leads',
    marker=dict(color=palette["darkgrey"], line=dict(color=palette["line"], width=1.0)),
    opacity=0.4,
    text=top_camp['total_leads'],
    textposition='outside',
    textfont=dict(size=10, color=palette["char"]),
    cliponaxis=False))

# 2) Successful Deals
fig_top.add_trace(go.Bar(
    x=top_camp['Campaign'],
    y=top_camp['completed_leads'],
    name='Successful Deals',
    marker=dict(color=palette["yellow"], line=dict(color=palette["line"], width=1.2)),
    opacity=0.95,
    text=top_camp['completed_leads'],
    textposition='outside',
    textfont=dict(size=10, color=palette["char"]),
    cliponaxis=False))

# 3) Conversion Rate (%)
fig_top.add_trace(go.Scatter(
    x=top_camp['Campaign'],
    y=top_camp['conv_pct'],
    name='Conversion Rate (%)',
    mode='lines+markers+text',
    line=dict(color=palette["green"], width=2),
    marker=dict(size=8, line=dict(color=palette["line"], width=1)),
    text=top_camp['conv_pct'].astype(str) + '%',
    textposition='top center',
    textfont=dict(size=11, color=palette["char"]),
    yaxis='y2'))

fig_top.update_layout(
    title=dict(text='Top Campaigns: Leads, Successful Deals, Conversion', x=0.0,
               font=dict(size=18, color=palette["char"])),
    barmode='overlay',
    xaxis=dict(
        title='Campaign',
        tickangle=45,
        showgrid=False, zeroline=False,
        linecolor=palette["line"], tickcolor=palette["line"]),
    yaxis=dict(
        title='Leads',
        range=[0, y1_max],
        showgrid=False, zeroline=False,
        linecolor=palette["line"], tickcolor=palette["line"]),
    yaxis2=dict(
        title='Conversion Rate (%)',
        overlaying='y',
        side='right',
        range=[0, y2_max],
        showgrid=False, zeroline=False),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    font=dict(family="Arial", color=palette["char"]),
    paper_bgcolor=palette["bg"],
    plot_bgcolor=palette["bg"],
    margin=dict(l=70, r=70, t=70, b=90))

fig_top.show()


Серые бары — общее число лидов.

Желтые бары (поверх) — успешные сделки.

Зеленая линия — конверсия (%).

Некоторые крупные кампании работают как генераторы трафика, но почти не конвертируют.

In [None]:
# Auto-Insights (prints)
# Топ-3 by leads
top_by_leads = camp_sorted.head(3)[['Campaign','total_leads','completed_leads','conversion_rate']].copy()
top_by_leads['conversion_rate'] = (top_by_leads['conversion_rate']*100).round(1)

In [None]:
# Top-3 by conversion among campaigns with volume ≥ 100 leads (to filter out random spikes)
min_volume = 100
top_by_conv = (camp_sorted[camp_sorted['total_leads']>=min_volume]
               .sort_values('conversion_rate', ascending=False)
               .head(3)[['Campaign','total_leads','completed_leads','conversion_rate']].copy())
top_by_conv['conversion_rate'] = (top_by_conv['conversion_rate']*100).round(1)

In [None]:
# Lead magnets with low conversion (from TOP-10 by leads — find 3 with the worst conversion)
lead_magnets_low = (camp_sorted.head(10)
                    .sort_values('conversion_rate', ascending=True)
                    .head(3)[['Campaign','total_leads','completed_leads','conversion_rate']].copy())
lead_magnets_low['conversion_rate'] = (lead_magnets_low['conversion_rate']*100).round(1)

In [None]:
print("\n=== Campaigns — Key Insights ===")
print("\nTop-3 by Total Leads:")
print(top_by_leads.to_string(index=False))

print(f"\nTop-3 by Conversion (volume ≥ {min_volume} leads):")
print(top_by_conv.to_string(index=False))

print("\nLead-magnets with low conversion (within Top-10 by leads):")
print(lead_magnets_low.to_string(index=False))


=== Campaigns — Key Insights ===

Top-3 by Total Leads:
                         Campaign  total_leads  completed_leads  conversion_rate
performancemax_digitalmarkt_ru_DE         2653              112              4.2
                youtube_shorts_DE         1635               53              3.2
                12.07.2023wide_DE         1575               48              3.0

Top-3 by Conversion (volume ≥ 100 leads):
           Campaign  total_leads  completed_leads  conversion_rate
brand_search_eng_DE          168               15              8.9
    02.07.23wide_DE          975               52              5.3
     07.07.23LAL_DE          542               28              5.2

Lead-magnets with low conversion (within Top-10 by leads):
              Campaign  total_leads  completed_leads  conversion_rate
     12.07.2023wide_DE         1575               48              3.0
     youtube_shorts_DE         1635               53              3.2
24.09.23retargeting_DE          479   

**Рекомендации:**

- Поддерживать баланс: часть бюджета направлять на широкие кампании ради объёма, часть — на точные ради качества.

- Проводить A/B тесты креативов для крупных кампаний с низкой конверсией.

###**2. Sources — Effectiveness (Leads, Conversion, CTR/CPC)**

In [None]:
# Parameters
TOP_M = 10
EXCLUDE_UNKNOWN = True     # exclude 'Unknown'
EXCLUDE_OFFLINE = True     # exclude 'Offline'

In [None]:
# Copy
d = deals.copy()
s = spend.copy()

# Filters by sources
if EXCLUDE_UNKNOWN:
    d = d[d['Source'].astype(str) != 'Unknown']
    s = s[s['Source'].astype(str) != 'Unknown']

if EXCLUDE_OFFLINE:
    d = d[d['Source'].astype(str) != 'Offline']
    s = s[s['Source'].astype(str) != 'Offline']

In [None]:
# 1) Aggregations by deals (lead-flow)
source_deals = d.groupby('Source', observed=False).agg(
    total_leads=('Id', 'count'),
    completed_leads=('Stage', lambda x: (x == 'Payment Done').sum()),
    failed_leads=('Stage',  lambda x: (x == 'Lost').sum())).reset_index()

source_deals['conversion_rate'] = np.where(
    source_deals['total_leads'] > 0,
    source_deals['completed_leads'] / source_deals['total_leads'], np.nan)

In [None]:
# 2) (optional) Add media metrics from spend
source_spend = s.groupby('Source', observed=False).agg(
    Impressions=('Impressions', 'sum'),
    Clicks=('Clicks', 'sum'),
    Spend=('Spend', 'sum')).reset_index()

sources = source_deals.merge(source_spend, on='Source', how='left')

In [None]:
# CTR/CPC
sources['CTR'] = np.where(sources['Impressions'] > 0, sources['Clicks'] / sources['Impressions'] * 100, np.nan)
sources['CPC'] = np.where(sources['Clicks'] > 0, sources['Spend'] / sources['Clicks'], np.nan)

In [None]:
# Sorting by lead volume
sources_sorted = sources.sort_values('total_leads', ascending=False).reset_index(drop=True)

In [None]:
# Top M for visualizations
top_src = sources_sorted.head(TOP_M).copy()
top_src['conv_pct'] = (top_src['conversion_rate'] * 100).round(1).fillna(0)

In [None]:
palette = {
    "darkgrey": "#3A3A3A",  # Total Leads
    "yellow": "#FFC300",    # Successful Deals
    "green": "#2CA02C",
    "line": "#B0B0B0",
    "char": "#2E2E2E",
    "bg": "rgba(0,0,0,0)"
}

fig_src = go.Figure()

# 1) Total Leads
fig_src.add_trace(go.Bar(
    x=top_src['Source'],
    y=top_src['total_leads'],
    name='Total Leads',
    marker=dict(color=palette["darkgrey"], line=dict(color=palette["line"], width=1.0)),
    opacity=0.4,
    text=top_src['total_leads'],
    textposition='outside',
    textfont=dict(size=10, color=palette["char"]),
    cliponaxis=False))

# 2) Successful Deals
fig_src.add_trace(go.Bar(
    x=top_src['Source'],
    y=top_src['completed_leads'],
    name='Successful Deals',
    marker=dict(color=palette["yellow"], line=dict(color=palette["line"], width=1.2)),
    opacity=0.95,
    text=top_src['completed_leads'],
    textposition='outside',
    textfont=dict(size=10, color=palette["char"]),
    cliponaxis=False))

# 3) Conversion Rate (%)
y2_max = max(5.0, float(top_src['conv_pct'].max())) * 1.25
fig_src.add_trace(go.Scatter(
    x=top_src['Source'],
    y=top_src['conv_pct'],
    name='Conversion Rate (%)',
    mode='lines+markers+text',
    line=dict(color=palette["green"], width=2),
    marker=dict(size=7, line=dict(color=palette["line"], width=1)),
    text=top_src['conv_pct'].astype(str) + '%',
    textposition='top center',
    textfont=dict(size=11, color=palette["char"]),
    yaxis='y2'))

fig_src.update_layout(
    title=dict(
        text='Top Sources: Leads, Successful Deals, Conversion',
        x=0.0, font=dict(size=18, color=palette["char"])),
    barmode='overlay',
    xaxis=dict(
        title='Source',
        tickangle=45,
        showgrid=False, zeroline=False,
        linecolor=palette["line"], tickcolor=palette["line"]),
    yaxis=dict(
        title='Leads',
        showgrid=False, zeroline=False,
        linecolor=palette["line"], tickcolor=palette["line"]),
    yaxis2=dict(
        title='Conversion Rate (%)',
        overlaying='y',
        side='right',
        range=[0, y2_max],
        showgrid=False, zeroline=False),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    font=dict(family="Arial", color=palette["char"]),
    paper_bgcolor=palette["bg"],
    plot_bgcolor=palette["bg"],
    margin=dict(l=70, r=70, t=70, b=90))

fig_src.show()


**Выводы:**

- **Facebook Ads** (4850 лидов, 202 успешных, конверсия 4.2%) и Google Ads (4226 лидов, 173 успешных, 4.1%) — крупнейшие по объёму.

- **Organic** — меньше лидов (2590), но конверсия выше (5.7%).

- **Webinar** — небольшой объём (379), но самая высокая конверсия (6.9%).

- **SMM** — тоже неплохая конверсия (5.3%) при объёме ~1700 лидов.

**Рекомендации:**

- Сохранять основной бюджет на Facebook/Google, но усиливать **органику и вебинары** — они дают более качественных клиентов.

- Инвестировать в **SMM**, так как у него оптимальный баланс объёма и конверсии.

- Источники с конверсией <3% анализировать и при необходимости отключать.

In [None]:

# labels only for sources with lead volume > 300
label_threshold = 300
sources_sorted['label'] = np.where(
    sources_sorted['total_leads'] > label_threshold,
    sources_sorted['Source'], "")

sources_sorted['conv_pct'] = (sources_sorted['conversion_rate'] * 100).round(1).fillna(0)

# gradient from yellow to bright orange
yellow_orange_scale = [
    [0.0, "#FFD93D"],  # яркий жёлтый
    [0.5, "#FFA500"],  # насыщенный оранжевый
    [1.0, "#FF6B00"]   # тёмный ярко-оранжевый
]

fig_bubble_src = px.scatter(
    sources_sorted,
    x='total_leads',
    y='conv_pct',
    size='completed_leads',
    color='conv_pct',
    text='label',
    hover_data=['Source', 'total_leads', 'completed_leads', 'failed_leads'],
    color_continuous_scale=yellow_orange_scale,
    size_max=80,
    title='Source Effectiveness (Bubble Plot)',
    labels={'total_leads': 'Total Leads', 'conv_pct': 'Conversion Rate (%)'})

fig_bubble_src.update_traces(
    textposition='top center',
    textfont=dict(size=11, color="#2E2E2E"),
    marker=dict(line=dict(width=1.5, color="#2E2E2E"), opacity=0.9))

fig_bubble_src.update_layout(
    template='plotly_white',
    paper_bgcolor="rgba(0,0,0,0)",
    plot_bgcolor="rgba(0,0,0,0)",
    margin=dict(l=70, r=40, t=60, b=60),
    xaxis_title="Total Leads",
    yaxis_title="Conversion Rate (%)")

fig_bubble_src.show()


In [None]:
# Top-3 by number of successful deals
top3_success = sources_sorted.nlargest(3, 'completed_leads')[['Source','total_leads','completed_leads','conversion_rate']]
top3_success['conversion_rate'] = (top3_success['conversion_rate']*100).round(1)

# Top-3 by conversion among sources with volume ≥ 300 leads
mask_vol = sources_sorted['total_leads'] >= 300
top3_conv = (sources_sorted[mask_vol]
             .sort_values('conversion_rate', ascending=False)
             .head(3)[['Source','total_leads','completed_leads','conversion_rate']])
top3_conv['conversion_rate'] = (top3_conv['conversion_rate']*100).round(1)

print("=== Sources — Key Insights ===")
print("\nTop-3 by Successful Deals:")
print(top3_success.to_string(index=False))

print("\nTop-3 by Conversion (volume ≥ 300 leads):")
print(top3_conv.to_string(index=False))


=== Sources — Key Insights ===

Top-3 by Successful Deals:
      Source  total_leads  completed_leads  conversion_rate
Facebook Ads         4850              202              4.2
  Google Ads         4226              173              4.1
     Organic         2590              147              5.7

Top-3 by Conversion (volume ≥ 300 leads):
 Source  total_leads  completed_leads  conversion_rate
Webinar          379               26              6.9
Organic         2590              147              5.7
    SMM         1730               91              5.3


##**Итоговые рекомендации**

**Кампании:**

- Оптимизировать крупные низкоконверсионные кампании.

- Масштабировать точечные кампании с >5% конверсии.

- Постоянно мониторить и пересматривать стратегию «широкие vs нишевые».

**Источники:**

- Основные лидогенераторы — Facebook и Google.

- Лидеры по качеству — Organic, Webinar, SMM.

- Оптимизировать бюджет в пользу более эффективных источников.