In [1]:
!pip show marketing-attribution-models

Name: marketing-attribution-models
Version: 1.0.8
Summary: Metodos de Atribuição de Marketing, forkeado da versão de Andre Tocci (via github da DP6).
Home-page: https://github.com/lgabs/Marketing-Attribution-Models
Author: Luan Fernandes
Author-email: luangabriel70@gmail.com
License: Apache License 2.0
Location: /home/luanfernandes/projects/buser/modelo-de-atribuicao/Marketing-Attribution-Models
Requires: matplotlib, numpy, pandas, seaborn
Required-by: 


In [2]:
import pytest

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from marketing_attribution_models import MAM

In [3]:
_df = None


def get_intermediate(df):
    global _df
    _df = df
    return df

In [4]:
df = (
    pd.read_csv("../data/test_dataset.csv")
    .pipe(get_intermediate)
    .assign(event_time=pd.to_datetime(_df.event_time))
    .assign(is_conversion=_df.is_conversion.astype("bool"))
    .assign(session_id=_df.session_id.astype("str"))
)

In [5]:
df

Unnamed: 0,user_pseudo_id,session_id,event_time,user_id,is_conversion,source_medium
0,A,1,2021-07-20 15:00:00,,False,seo
1,A,2,2022-01-20 15:00:00,,False,direct
2,A,3,2022-01-20 16:00:00,,False,google_ads
3,A,4,2022-01-20 17:00:00,A1,True,direct
4,B,5,2022-01-21 17:00:00,,False,seo
5,B,6,2022-01-21 20:00:00,,False,direct
6,C,7,2022-02-21 21:00:00,,False,direct
7,C,8,2022-02-21 22:00:00,,False,crm
8,C,9,2022-02-21 23:00:00,C1,True,direct
9,D,10,2022-02-21 23:01:00,,False,seo


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   user_pseudo_id  16 non-null     object        
 1   session_id      16 non-null     object        
 2   event_time      16 non-null     datetime64[ns]
 3   user_id         4 non-null      object        
 4   is_conversion   16 non-null     bool          
 5   source_medium   16 non-null     object        
dtypes: bool(1), datetime64[ns](1), object(4)
memory usage: 784.0+ bytes


## Create Model

In [7]:
model = MAM(
    df,
    attribution_window=30,
    channels_colname="source_medium",
    group_channels=True,
    group_channels_by_id_list=["user_pseudo_id"],
    group_timestamp_colname="event_time",
    journey_with_conv_colname="is_conversion",
    create_journey_id_based_on_conversion=True,
)

model.as_pd_dataframe()

Unnamed: 0,journey_id,journey_rnk,user_pseudo_id,channels_agg,sessions_agg,converted_agg,conversion_value,conversion_time,session_id,time_till_conv_agg
0,id:A_J:0,0,A,direct > google_ads > direct,"[2, 3, 4]",True,1,2022-01-20 17:00:00,4.0,2.0 > 1.0 > 0.0
1,id:B_J:0,0,B,seo > direct,"[5, 6]",False,0,NaT,,nan > nan
2,id:C_J:0,0,C,direct > crm > direct,"[7, 8, 9]",True,1,2022-02-21 23:00:00,9.0,2.0 > 1.0 > 0.0
3,id:D_J:0,0,D,seo > direct > direct,"[10, 11, 12]",True,1,2022-02-21 23:21:00,12.0,0.3333 > 0.1667 > 0.0
4,id:D_J:1,1,D,direct > direct > seo > direct,"[13, 14, 15, 16]",True,1,2022-02-27 03:00:00,16.0,122.0 > 2.0 > 1.0 > 0.0


In [8]:
model.DataFrame.sessions_agg.to_dict()

{0: ['2', '3', '4'],
 1: ['5', '6'],
 2: ['7', '8', '9'],
 3: ['10', '11', '12'],
 4: ['13', '14', '15', '16']}

In [9]:
model.attribution_first_click()[0].to_dict()

{0: [1, 0, 0], 1: [0, 0], 2: [1, 0, 0], 3: [1, 0, 0], 4: [1, 0, 0, 0]}

In [10]:
model.attribution_last_click()[0].to_dict()

{0: [0, 0, 1], 1: [0, 0], 2: [0, 0, 1], 3: [0, 0, 1], 4: [0, 0, 0, 1]}

In [11]:
model.attribution_last_click_non(but_not_this_channel="direct")[0].to_dict()

{0: [0, 1, 0], 1: [0, 0], 2: [0, 1, 0], 3: [1, 0, 0], 4: [0, 0, 1, 0]}

In [12]:
model.attribution_linear()[0].to_dict()

{0: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333],
 1: [0.0, 0.0],
 2: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333],
 3: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333],
 4: [0.25, 0.25, 0.25, 0.25]}

In [13]:
model.attribution_time_decay(decay_over_time=0.5, frequency=24)[0]

0    [0.3333333333333333, 0.3333333333333333, 0.333...
1                                           [nan, nan]
2    [0.3333333333333333, 0.3333333333333333, 0.333...
3    [0.3333333333333333, 0.3333333333333333, 0.333...
4    [0.010309278350515464, 0.32989690721649484, 0....
dtype: object

In [14]:
model = MAM(
    df,
    attribution_window=30,
    channels_colname="source_medium",
    group_channels=True,
    group_channels_by_id_list=["user_pseudo_id"],
    group_timestamp_colname="event_time",
    journey_with_conv_colname="is_conversion",
    create_journey_id_based_on_conversion=True,
)

attribution_markov = model.attribution_markov()

In [15]:
# attributions
attribution_markov[0].to_dict()

{0: [0.46153846153846134, 0.07692307692307729, 0.46153846153846134],
 1: [0.3333333333333335, 0.6666666666666665],
 2: [0.46153846153846134, 0.07692307692307729, 0.46153846153846134],
 3: [0.2000000000000001, 0.3999999999999999, 0.3999999999999999],
 4: [0.28571428571428564,
  0.28571428571428564,
  0.1428571428571429,
  0.28571428571428564]}

In [16]:
attribution_markov[2]

Unnamed: 0,(inicio),crm,google_ads,seo,direct,(null),(conversion)
(inicio),0.0,0.0,0.0,0.4,0.6,0.0,0.0
crm,0.0,0.0,0.0,0.0,1.0,0.0,0.0
google_ads,0.0,0.0,0.0,0.0,1.0,0.0,0.0
seo,0.0,0.0,0.0,0.0,1.0,0.0,0.0
direct,0.0,0.125,0.125,0.125,0.0,0.125,0.5
(null),0.0,0.0,0.0,0.0,0.0,1.0,0.0
(conversion),0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [17]:
attribution_markov[3]

Unnamed: 0,removal_effect
crm,0.166667
google_ads,0.166667
seo,0.5
direct,1.0


In [18]:
model2 = MAM(
    df,
    attribution_window=30,
    channels_colname="source_medium",
    group_channels=True,
    group_channels_by_id_list=["user_pseudo_id"],
    group_timestamp_colname="event_time",
    journey_with_conv_colname="is_conversion",
    create_journey_id_based_on_conversion=True,
)

In [19]:
# attributions
attribution_markov2 = model2.attribution_markov(transition_to_same_state=True)
attribution_markov2[0].to_dict()

{0: [0.4615384615384619, 0.07692307692307636, 0.4615384615384619],
 1: [0.3333333333333331, 0.666666666666667],
 2: [0.4615384615384619, 0.07692307692307636, 0.4615384615384619],
 3: [0.19999999999999982, 0.40000000000000013, 0.40000000000000013],
 4: [0.28571428571428575,
  0.28571428571428575,
  0.14285714285714268,
  0.28571428571428575]}

In [20]:
attribution_markov2[2]

Unnamed: 0,(inicio),crm,google_ads,seo,direct,(null),(conversion)
(inicio),0.0,0.0,0.0,0.4,0.6,0.0,0.0
crm,0.0,0.0,0.0,0.0,1.0,0.0,0.0
google_ads,0.0,0.0,0.0,0.0,1.0,0.0,0.0
seo,0.0,0.0,0.0,0.0,1.0,0.0,0.0
direct,0.0,0.1,0.1,0.1,0.2,0.1,0.4
(null),0.0,0.0,0.0,0.0,0.0,1.0,0.0
(conversion),0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [21]:
attribution_markov2[3]

Unnamed: 0,removal_effect
crm,0.166667
google_ads,0.166667
seo,0.5
direct,1.0


In [22]:
model.DataFrame

Unnamed: 0,journey_id,journey_rnk,user_pseudo_id,channels_agg,sessions_agg,converted_agg,conversion_value,conversion_time,session_id,time_till_conv_agg,attribution_markov_algorithmic
0,id:A_J:0,0,A,direct > google_ads > direct,"[2, 3, 4]",True,1,2022-01-20 17:00:00,4.0,2.0 > 1.0 > 0.0,0.4615 > 0.0769 > 0.4615
1,id:B_J:0,0,B,seo > direct,"[5, 6]",False,0,NaT,,nan > nan,0.3333 > 0.6667
2,id:C_J:0,0,C,direct > crm > direct,"[7, 8, 9]",True,1,2022-02-21 23:00:00,9.0,2.0 > 1.0 > 0.0,0.4615 > 0.0769 > 0.4615
3,id:D_J:0,0,D,seo > direct > direct,"[10, 11, 12]",True,1,2022-02-21 23:21:00,12.0,0.3333 > 0.1667 > 0.0,0.2 > 0.4 > 0.4
4,id:D_J:1,1,D,direct > direct > seo > direct,"[13, 14, 15, 16]",True,1,2022-02-27 03:00:00,16.0,122.0 > 2.0 > 1.0 > 0.0,0.2857 > 0.2857 > 0.1429 > 0.2857
