In [31]:
import pandas as pd 
import numpy as np
import math
import random

In [32]:
channels = np.array(["channel1", "channel2", "channel3", "channel4"])

In [33]:
users = pd.DataFrame({
    "user_id": [], 
    "session_id": [], 
    "channel_id": [],
    "revenue": []
})
for user in range(1000):
    user_id = "user" + str(user)
    iterations = random.randint(1, 10)
    for i in range(iterations):
        _index = random.randint(0, len(channels) - 1)
        users = pd.concat([users, pd.DataFrame({
            "user_id": [user_id], 
            "session_id": ["session" + str(i)],
            "channel_id": [channels[_index]],
            "revenue": [0]
        })], axis =  0)

conversion = np.array([0, 0, 0, 0, 0, 0, 1, 0, 0, 1])
for i in range(len(users["revenue"])):
    _index = random.randint(0, len(conversion) - 1)
    users.iloc[i, 3] = conversion[_index] * random.randint(100, 4000)

In [34]:
users.index = [x for x in range(len(users["user_id"]))]

In [35]:
users["Conversion"] = 0
users.loc[users[users["revenue"] != 0].index, "Conversion"] = 1
users["window"] = users.groupby("user_id")["Conversion"].transform((lambda x: x.expanding().sum()))
#users["lead"] = users.groupby("user_id")["Conversion"].transform((lambda x: x.shift(1)))

In [36]:
users.loc[users[users["Conversion"] == 1].index, "window"] = users.loc[users[users["Conversion"] == 1].index, "window"].apply((lambda x: x - 1))

In [37]:
users[users["user_id"].isin(users[users["Conversion"] != 0]["user_id"].unique())]

Unnamed: 0,user_id,session_id,channel_id,revenue,Conversion,window
19,user3,session0,channel1,0.0,0,0.0
20,user3,session1,channel4,0.0,0,0.0
21,user3,session2,channel1,0.0,0,0.0
22,user3,session3,channel2,0.0,0,0.0
23,user3,session4,channel3,0.0,0,0.0
...,...,...,...,...,...,...
5608,user998,session0,channel3,0.0,0,0.0
5609,user998,session1,channel1,0.0,0,0.0
5610,user998,session2,channel3,2330.0,1,0.0
5611,user998,session3,channel3,0.0,0,1.0


In [38]:
channels = users.groupby(["user_id", "window"])["channel_id"].agg('->'.join).reset_index()
revenue = users.groupby(["user_id", "window"])["revenue"].agg('sum').reset_index()
conversions = users.groupby(["user_id", "window"])["Conversion"].agg('sum').reset_index()

processed = pd.concat([channels, revenue, conversions], axis = 1)

In [39]:
processed.drop(["user_id", "window"], axis = 1, inplace = True)

In [40]:
processed["Null"] = 0
processed.loc[processed[processed["Conversion"] == 0].index, "Null"] = 1

In [41]:
final = processed.groupby(["channel_id"]).agg({"revenue": "sum",
                                    "Conversion": "sum", 
                                    "Null": "sum"})

In [42]:
final.sort_values(["Null"], ascending = [False])

Unnamed: 0_level_0,revenue,Conversion,Null
channel_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
channel1,169057.0,89,60
channel3,200087.0,91,53
channel2,240740.0,112,52
channel4,154740.0,74,50
channel2->channel4,27427.0,15,16
...,...,...,...
channel1->channel1->channel1->channel3,1078.0,1,0
channel1->channel1->channel1->channel4,1401.0,1,0
channel1->channel1->channel1->channel4->channel1->channel1->channel3,2155.0,1,0
channel1->channel1->channel2,6165.0,3,0


In [25]:
data = final.copy()

In [43]:
#Calculate base conversion rates
base_conversion_rate = data["Conversion"].sum()/(data["Conversion"].sum() + data["Null"].sum())

In [46]:
processed = data.copy().reset_index()
processed.columns = ['channel_id', 'Revenue', 'Conversion', 'Null']

In [47]:
# Markov Channel Attribution Model
# Step 1: Prepare data for Markov model
# We need to extract all transitions from the channel sequences

# Extract all transitions with their outcomes
# We need to build separate paths for conversions and nulls
all_transitions = []

for idx, row in processed.iterrows():
    sequence = row['channel_id']
    conversion_count = row['Conversion']
    null_count = row['Null']
    
    if pd.isna(sequence) or sequence == '':
        continue
        
    channels = sequence.split('->')
    
    # Build conversion path
    if conversion_count > 0:
        # Start -> first channel
        if len(channels) > 0:
            all_transitions.append({
                'from': '(start)',
                'to': channels[0],
                'count': conversion_count
            })
        # Channel to channel transitions
        for i in range(len(channels) - 1):
            all_transitions.append({
                'from': channels[i],
                'to': channels[i+1],
                'count': conversion_count
            })
        # Last channel -> conversion
        if len(channels) > 0:
            all_transitions.append({
                'from': channels[-1],
                'to': '(conversion)',
                'count': conversion_count
            })
    
    # Build null path
    if null_count > 0:
        # Start -> first channel
        if len(channels) > 0:
            all_transitions.append({
                'from': '(start)',
                'to': channels[0],
                'count': null_count
            })
        # Channel to channel transitions
        for i in range(len(channels) - 1):
            all_transitions.append({
                'from': channels[i],
                'to': channels[i+1],
                'count': null_count
            })
        # Last channel -> null
        if len(channels) > 0:
            all_transitions.append({
                'from': channels[-1],
                'to': '(null)',
                'count': null_count
            })

transitions_df = pd.DataFrame(all_transitions)
print("Transitions DataFrame (first 10 rows):")
print(transitions_df.head(10))
print(f"\nTotal transitions: {len(transitions_df)}")


Transitions DataFrame (first 10 rows):
       from            to  count
0   (start)      channel1   1310
1  channel1  (conversion)   1310
2   (start)      channel1    758
3  channel1        (null)    758
4   (start)      channel1    227
5  channel1      channel1    227
6  channel1  (conversion)    227
7   (start)      channel1    135
8  channel1      channel1    135
9  channel1        (null)    135

Total transitions: 37113


In [48]:
# Step 2: Build transition probability matrix
# Aggregate transitions by from->to pairs
transition_matrix = transitions_df.groupby(['from', 'to']).agg({
    'count': 'sum'
}).reset_index()

# Calculate probabilities
transition_probs = transition_matrix.pivot_table(
    index='from', 
    columns='to', 
    values='count', 
    fill_value=0
)

# Normalize rows to get probabilities
transition_probs_norm = transition_probs.div(transition_probs.sum(axis=1), axis=0).fillna(0)

print("Transition Probability Matrix:")
print(transition_probs_norm)


Transition Probability Matrix:
to        (conversion)    (null)  channel1  channel2  channel3  channel4
from                                                                    
(start)       0.000000  0.000000  0.252353  0.250645  0.250076  0.246926
channel1      0.197939  0.140815  0.166606  0.168108  0.165053  0.161479
channel2      0.191176  0.147677  0.162990  0.162107  0.169530  0.166520
channel3      0.192531  0.148710  0.163731  0.162385  0.164767  0.167875
channel4      0.203788  0.143643  0.162999  0.163778  0.159938  0.165854


In [49]:
# Step 3: Calculate conversion probabilities using Markov chain
# We'll use the transition matrix to calculate the probability of reaching conversion

def calculate_conversion_probability(transition_matrix, max_iterations=100, tolerance=1e-6):
    """
    Calculate conversion probability for each state using iterative method
    """
    # Get all unique states (from index)
    states = list(transition_matrix.index)
    all_states = set(states) | set(transition_matrix.columns)
    
    conversion_states = ['(conversion)']
    null_states = ['(null)']
    
    # Initialize probabilities
    prob_conversion = {state: 0.0 for state in all_states}
    prob_conversion['(conversion)'] = 1.0
    prob_conversion['(null)'] = 0.0
    
    # Iterative calculation
    for iteration in range(max_iterations):
        prob_conversion_new = prob_conversion.copy()
        
        for state in states:
            if state in conversion_states or state in null_states:
                continue
                
            # Calculate probability of conversion from this state
            total_prob = 0.0
            for next_state in transition_matrix.columns:
                if state in transition_matrix.index:
                    transition_prob = transition_matrix.loc[state, next_state]
                    
                    if next_state == '(conversion)':
                        total_prob += transition_prob
                    elif next_state in all_states:
                        total_prob += transition_prob * prob_conversion[next_state]
            
            prob_conversion_new[state] = total_prob
        
        # Check convergence
        non_terminal_states = [s for s in states if s not in conversion_states + null_states]
        if non_terminal_states:
            max_diff = max(abs(prob_conversion_new[state] - prob_conversion[state]) 
                          for state in non_terminal_states)
            
            if max_diff < tolerance:
                break
        
        prob_conversion = prob_conversion_new
    
    return prob_conversion

# Calculate base conversion probability
base_conversion_prob = calculate_conversion_probability(transition_probs_norm)
print("\nBase Conversion Probabilities:")
for state, prob in base_conversion_prob.items():
    if state not in ['(conversion)', '(null)']:
        print(f"{state}: {prob:.4f}")



Base Conversion Probabilities:
channel2: 0.5712
channel3: 0.5713
channel4: 0.5790
(start): 0.5749
channel1: 0.5781


In [None]:
# Step 4: Calculate Removal Effects (Markov Attribution)
# For each channel, remove it and calculate the new conversion probability
# Removal effect = (base_prob - prob_without_channel) / base_prob

def remove_channel_from_matrix(transition_matrix, channel_to_remove):
    """
    Remove a channel from the transition matrix by redirecting transitions
    """
    matrix_copy = transition_matrix.copy()
    
    # Find all transitions involving the channel
    # Remove transitions from the channel
    if channel_to_remove in matrix_copy.index:
        matrix_copy = matrix_copy.drop(index=channel_to_remove)
    
    # Remove transitions to the channel (redirect to next state or skip)
    if channel_to_remove in matrix_copy.columns:
        # For states that transition to removed channel, redistribute probabilities
        for state in matrix_copy.index:
            if matrix_copy.loc[state, channel_to_remove] > 0:
                # Get the probability that was going to removed channel
                removed_prob = matrix_copy.loc[state, channel_to_remove]
                # Redistribute to other states proportionally
                row_sum = matrix_copy.loc[state].sum() - removed_prob
                if row_sum > 0:
                    matrix_copy.loc[state] = matrix_copy.loc[state] * (matrix_copy.loc[state].sum() / row_sum)
                matrix_copy.loc[state, channel_to_remove] = 0
        
        matrix_copy = matrix_copy.drop(columns=channel_to_remove)
    
    # Renormalize rows
    row_sums = matrix_copy.sum(axis=1)
    matrix_copy = matrix_copy.div(row_sums, axis=0).fillna(0)
    
    return matrix_copy

# Get all channels (excluding start, conversion, null)
all_channels = [ch for ch in transition_probs_norm.index 
                if ch not in ['(start)', '(conversion)', '(null)']]

# Calculate base conversion probability from start
base_prob_from_start = base_conversion_prob.get('(start)', 0)

print(f"\nBase conversion probability from start: {base_prob_from_start:.4f}")

# Calculate removal effects for each channel
removal_effects = {}
conversion_probs_without = {}

for channel in all_channels:
    # Create matrix without this channel
    matrix_without = remove_channel_from_matrix(transition_probs_norm, channel)
    
    # Calculate new conversion probability
    prob_without = calculate_conversion_probability(matrix_without)
    prob_from_start_without = prob_without.get('(start)', 0)
    
    conversion_probs_without[channel] = prob_from_start_without
    
    # Calculate removal effect
    if base_prob_from_start > 0:
        removal_effect = (base_prob_from_start - prob_from_start_without) / base_prob_from_start
    else:
        removal_effect = 0
    
    removal_effects[channel] = removal_effect
    print(f"\nChannel: {channel}")
    print(f"  Conversion prob without channel: {prob_from_start_without:.4f}")
    print(f"  Removal effect: {removal_effect:.4f}")



Base conversion probability from start: 0.5749

Channel: channel1
  Conversion prob without channel: 0.5718
  Removal effect: 0.0054

Channel: channel2
  Conversion prob without channel: 0.5784
  Removal effect: -0.0062

Channel: channel3
  Conversion prob without channel: 0.5784
  Removal effect: -0.0061

Channel: channel4
  Conversion prob without channel: 0.5709
  Removal effect: 0.0070


In [53]:
# Step 5: Attribute conversions and revenue based on removal effects
# Total conversions and revenue
total_conversions = processed['Conversion'].sum()
total_revenue = processed['Revenue'].sum()

# Normalize removal effects to get attribution weights
# Use absolute values for negative removal effects (channels that hurt conversion)
# In standard Markov attribution, we use absolute removal effects
absolute_removal_effects = {ch: abs(removal_effects[ch]) for ch in all_channels}
total_removal_effect = sum(absolute_removal_effects.values())

if total_removal_effect > 0:
    attribution_weights = {ch: absolute_removal_effects[ch] / total_removal_effect 
                          for ch in all_channels}
else:
    # If no removal effects, distribute equally
    attribution_weights = {ch: 1.0 / len(all_channels) for ch in all_channels}

# Create attribution results
markov_attribution = pd.DataFrame({
    'Channel': all_channels,
    'Removal_Effect': [removal_effects[ch] for ch in all_channels],
    'Attribution_Weight': [attribution_weights[ch] for ch in all_channels],
    'Attributed_Conversions': [attribution_weights[ch] * total_conversions for ch in all_channels],
    'Attributed_Revenue': [attribution_weights[ch] * total_revenue for ch in all_channels]
})

markov_attribution = markov_attribution.sort_values('Attributed_Revenue', ascending=False)

print("\n" + "="*80)
print("MARKOV CHANNEL ATTRIBUTION RESULTS")
print("="*80)
print(markov_attribution.to_string(index=False))
print("\nTotal Conversions:", total_conversions)
print("Total Revenue:", total_revenue)



MARKOV CHANNEL ATTRIBUTION RESULTS
 Channel  Removal_Effect  Attribution_Weight  Attributed_Conversions  Attributed_Revenue
channel4        0.006986            0.283620             4296.554784        8.795738e+06
channel2       -0.006195            0.251501             3809.991285        7.799665e+06
channel3       -0.006075            0.246636             3736.284137        7.648774e+06
channel1        0.005375            0.218243             3306.169794        6.768261e+06

Total Conversions: 15149
Total Revenue: 31012438.0


In [54]:
# Step 6: Compare with actual channel performance
# Get actual performance from final dataframe
actual_performance = final.copy()
actual_performance = actual_performance.reset_index()
actual_performance.columns = ['Channel_Sequence', 'Revenue', 'Conversions', 'Null']

# For comparison, let's also see per-channel actuals (sum across all sequences containing the channel)
channel_actuals = {}
for channel in all_channels:
    channel_revenue = 0
    channel_conversions = 0
    channel_null = 0
    
    for idx, row in actual_performance.iterrows():
        sequence = row['Channel_Sequence']
        if channel in str(sequence):
            channel_revenue += row['Revenue']
            channel_conversions += row['Conversions']
            channel_null += row['Null']
    
    channel_actuals[channel] = {
        'Revenue': channel_revenue,
        'Conversions': channel_conversions,
        'Null': channel_null
    }

# Create comparison dataframe more efficiently
markov_dict = markov_attribution.set_index('Channel').to_dict('index')

comparison_df = pd.DataFrame({
    'Channel': all_channels,
    'Markov_Attributed_Revenue': [markov_dict[ch]['Attributed_Revenue'] for ch in all_channels],
    'Markov_Attributed_Conversions': [markov_dict[ch]['Attributed_Conversions'] for ch in all_channels],
    'Actual_Revenue': [channel_actuals[ch]['Revenue'] for ch in all_channels],
    'Actual_Conversions': [channel_actuals[ch]['Conversions'] for ch in all_channels],
    'Actual_Null': [channel_actuals[ch]['Null'] for ch in all_channels]
})

comparison_df = comparison_df.sort_values('Markov_Attributed_Revenue', ascending=False)

print("\n" + "="*80)
print("MARKOV ATTRIBUTION vs ACTUAL PERFORMANCE COMPARISON")
print("="*80)
print(comparison_df.to_string(index=False))



MARKOV ATTRIBUTION vs ACTUAL PERFORMANCE COMPARISON
 Channel  Markov_Attributed_Revenue  Markov_Attributed_Conversions  Actual_Revenue  Actual_Conversions  Actual_Null
channel4               8.795738e+06                    4296.554784       1135274.0                 535          418
channel2               7.799665e+06                    3809.991285       1228688.0                 577          414
channel3               7.648774e+06                    3736.284137       1137158.0                 546          423
channel1               6.768261e+06                    3306.169794       1052648.0                 526          410
