In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [2]:
df = pd.read_csv('journeys.csv')

In [3]:
df['Journeys'] = df['Journeys'].apply(lambda x: eval(x))

In [4]:
list_of_paths = df['Journeys']
total_conversions = df['Conversion_value'].sum()
base_conversion_rate = total_conversions / len(list_of_paths)

In [5]:
def transition_states(list_of_paths):
    list_of_unique_channels = set(x for element in list_of_paths for x in element)
    transition_states = {x + '>' + y: 0 for x in list_of_unique_channels for y in list_of_unique_channels}

    for possible_state in list_of_unique_channels:
        if possible_state not in ['Conversion', 'Null']:
            for user_path in list_of_paths:
                if possible_state in user_path:
                    indices = [i for i, s in enumerate(user_path) if possible_state in s]
                    for col in indices:
                        transition_states[user_path[col] + '>' + user_path[col + 1]] += 1

    return transition_states


trans_states = transition_states(list_of_paths)

trans_states = transition_states(list_of_paths)
trans_states

{'Start>Start': 0,
 'Start>Facebook ads': 3317,
 'Start>Instagram influencers': 3378,
 'Start>Youtube ads': 3305,
 'Start>Conversion': 0,
 'Start>Null': 0,
 'Facebook ads>Start': 0,
 'Facebook ads>Facebook ads': 0,
 'Facebook ads>Instagram influencers': 5355,
 'Facebook ads>Youtube ads': 4811,
 'Facebook ads>Conversion': 138,
 'Facebook ads>Null': 2455,
 'Instagram influencers>Start': 0,
 'Instagram influencers>Facebook ads': 4824,
 'Instagram influencers>Instagram influencers': 0,
 'Instagram influencers>Youtube ads': 4871,
 'Instagram influencers>Conversion': 541,
 'Instagram influencers>Null': 3705,
 'Youtube ads>Start': 0,
 'Youtube ads>Facebook ads': 4618,
 'Youtube ads>Instagram influencers': 5208,
 'Youtube ads>Youtube ads': 0,
 'Youtube ads>Conversion': 145,
 'Youtube ads>Null': 3016,
 'Conversion>Start': 0,
 'Conversion>Facebook ads': 0,
 'Conversion>Instagram influencers': 0,
 'Conversion>Youtube ads': 0,
 'Conversion>Conversion': 0,
 'Conversion>Null': 0,
 'Null>Start': 0,
 

In [6]:
def transition_prob(trans_dict):
    list_of_unique_channels = set(x for element in list_of_paths for x in element)
    trans_prob = defaultdict(dict)
    for state in list_of_unique_channels:
        if state not in ['Conversion', 'Null']:
            counter = 0
            index = [i for i, s in enumerate(trans_dict) if state + '>' in s]
            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    counter += trans_dict[list(trans_dict)[col]]
            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    state_prob = float((trans_dict[list(trans_dict)[col]])) / float(counter)
                    trans_prob[list(trans_dict)[col]] = state_prob

    return trans_prob


trans_prob = transition_prob(trans_states)
trans_prob

defaultdict(dict,
            {'Start>Facebook ads': 0.3317,
             'Start>Instagram influencers': 0.3378,
             'Start>Youtube ads': 0.3305,
             'Facebook ads>Instagram influencers': 0.4197037385375029,
             'Facebook ads>Youtube ads': 0.37706716827337566,
             'Facebook ads>Conversion': 0.010815894662591112,
             'Facebook ads>Null': 0.1924131985265303,
             'Instagram influencers>Facebook ads': 0.3460296965784377,
             'Instagram influencers>Youtube ads': 0.34940104727064053,
             'Instagram influencers>Conversion': 0.03880639839322861,
             'Instagram influencers>Null': 0.26576285775769315,
             'Youtube ads>Facebook ads': 0.3555863555863556,
             'Youtube ads>Instagram influencers': 0.401016401016401,
             'Youtube ads>Conversion': 0.011165011165011165,
             'Youtube ads>Null': 0.23223223223223224})

In [7]:
def transition_matrix(list_of_paths, transition_probabilities):
    trans_matrix = pd.DataFrame()
    list_of_unique_channels = set(x for element in list_of_paths for x in element)

    for channel in list_of_unique_channels:
        trans_matrix[channel] = 0.00
        trans_matrix.loc[channel] = 0.00
        trans_matrix.loc[channel][channel] = 1.0 if channel in ['Conversion', 'Null'] else 0.0

    for key, value in transition_probabilities.items():
        origin, destination = key.split('>')
        trans_matrix.at[origin, destination] = value

    return trans_matrix


trans_matrix = transition_matrix(list_of_paths, trans_prob)
trans_matrix

Unnamed: 0,Start,Facebook ads,Instagram influencers,Youtube ads,Conversion,Null
Start,0.0,0.3317,0.3378,0.3305,0.0,0.0
Facebook ads,0.0,0.0,0.419704,0.377067,0.010816,0.192413
Instagram influencers,0.0,0.34603,0.0,0.349401,0.038806,0.265763
Youtube ads,0.0,0.355586,0.401016,0.0,0.011165,0.232232
Conversion,0.0,0.0,0.0,0.0,1.0,0.0
Null,0.0,0.0,0.0,0.0,0.0,1.0


In [8]:
def removal_effects(df, conversion_rate):
    removal_effects_dict = {}
    channels = [channel for channel in df.columns if channel not in ['Start',
                                                                     'Null',
                                                                     'Conversion']]
    for channel in channels:
        removal_df = df.drop(channel, axis=1).drop(channel, axis=0)
        for column in removal_df.columns:
            row_sum = np.sum(list(removal_df.loc[column]))
            null_pct = float(1) - row_sum
            if null_pct != 0:
                removal_df.loc[column]['Null'] = null_pct
            removal_df.loc['Null']['Null'] = 1.0

        removal_to_conv = removal_df[
            ['Null', 'Conversion']].drop(['Null', 'Conversion'], axis=0)
        removal_to_non_conv = removal_df.drop(
            ['Null', 'Conversion'], axis=1).drop(['Null', 'Conversion'], axis=0)

        removal_inv_diff = np.linalg.inv(
            np.identity(
                len(removal_to_non_conv.columns)) - np.asarray(removal_to_non_conv))
        removal_dot_prod = np.dot(removal_inv_diff, np.asarray(removal_to_conv))
        removal_cvr = pd.DataFrame(removal_dot_prod,
                                   index=removal_to_conv.index)[[1]].loc['Start'].values[0]
        removal_effect = 1 - removal_cvr / conversion_rate
        removal_effects_dict[channel] = removal_effect

    return removal_effects_dict

removal_effects_dict = removal_effects(trans_matrix, base_conversion_rate)


In [9]:
def markov_chain_allocations(removal_effects, total_conversions):
    re_sum = np.sum(list(removal_effects.values()))

    return {k: (v / re_sum) * total_conversions for k, v in removal_effects.items()}


attributions = markov_chain_allocations(removal_effects_dict, total_conversions)

In [10]:
attributions

{'Facebook ads': 251.51855382370894,
 'Instagram influencers': 322.24748804832024,
 'Youtube ads': 250.2339581279709}