In [1]:
import pandas as pd

# Load your raw dataset (assuming it's a CSV)
df = pd.read_csv('../Datasets/Marketing campaign dataset.csv')
print(df.head())

   campaign_item_id  no_of_days        time  ext_service_id ext_service_name  \
0              2733           7  2022-05-01             128     Facebook Ads   
1              2733           8  2022-05-02              16            DV360   
2              2733           9  2022-05-03             128     Facebook Ads   
3              2733          10  2022-05-04             128     Facebook Ads   
4              2733          11  2022-05-05               4       Google Ads   

   creative_id  creative_width  creative_height  \
0         1000           300.0            250.0   
1         1000           300.0            250.0   
2         1000           300.0            250.0   
3         1000           300.0            250.0   
4         1000           300.0            250.0   

                          search_tags  template_id  ... exchange_rate  \
0                     #The Power of X         90.0  ...             1   
1                      #Be Bold. Be X         90.0  ...           

#### Building the graph structure

In [None]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

# Assigning relevent features to node features
campaign_features = df[['campaign_id', 'time_zone', 'search_tag']]
platform_features = df[['ext_service_id','ext_service_name']]
advertiser_features = df[['advertiser_id','advertiser_name']]
creative_features = df[['creative_id','creative_dimension']]
template_features = df[['template_id']]
channel_features = df[['channel_id','channel_name']]
time_features = df[['time']]
network_features = df[['netowrk_id']]
landing_page_features = df[['landing_page']]

# Get the embedded key words
keyword_embedding_columns = [f"keyword_{i}" for i in range(50)]
keyword_features = df[keyword_embedding_columns].values  # Get only the embedding columns

# Creating the Nodes    
campaign = torch.tensor(campaign_features.values, dtype=torch.float)  # node features for Campaign
platform = torch.tensor(platform_features.values, dtype=torch.float)  # node features for Platform
advertiser = torch.tensor(advertiser_features.values, dtype=torch.float)  # node features for Advertiser
creative = torch.tensor(creative_features.values, dtype=torch.float)  # node features for Creative
template = torch.tensor(template_features.values, dtype=torch.float) # node features for  template
channel = torch.tensor(channel_features.values, dtype=torch.float)  # node features for channel
time = torch.tensor(time_features.values, dtype=torch.float)   # node features for Time
keyword = torch.tensor(keyword_features, dtype=torch.float)
network = torch.tensor(network_features.values, dtype=torch.float)  # node features for Network
landing_page = torch.tensor(landing_page_features.values, dtype=torch.float)  # node features for landing page

# MAKING CONNECTION BETWEEN NODES
# Mapping IDs to indices for each node type
campaign_id_to_index = {cid: idx for idx, cid in enumerate(campaign_features['campaign_id'].unique())}
platform_id_to_index = {pid: idx for idx, pid in enumerate(platform_features['ext_service_id'].unique())}
advertiser_id_to_index = {aid: idx for idx, aid in enumerate(advertiser_features['advertiser_id'].unique())}
network_id_to_index = {nid: idx for idx, nid in enumerate(network_features['network_id'].unique())}
creative_id_to_index = {crid: idx for idx, crid in enumerate(network_features['network_id'].unique())}
template_id_to_index = {tid: idx for idx, tid in enumerate(template_features['template_id'].unique())}
channel_id_to_index = {chid: idx for idx, chid in enumerate(channel_features['channel_id'].unique())}

# creating 

# Campaign -> Platform
source_nodes = df['campaign_item_id'].map(campaign_id_to_index).values
target_nodes = df['ext_service_id'].map(platform_id_to_index).values
edge_index_campaign_platform = torch.tensor([source_nodes, target_nodes], dtype=torch.long)

# Campaign -> Advertiser
source_nodes = df['campaign_item_id'].map(campaign_id_to_index).values
target_nodes = df['advertiser_id'].map(advertiser_id_to_index).values
edge_index_campaign_advertiser = torch.tensor([source_nodes, target_nodes], dtype=torch.long)

# Campaign -> Network
source_nodes = df['campaign_item_id'].map(campaign_id_to_index).values
target_nodes = df['network_id'].map(network_id_to_index).values
edge_index_campaign_network = torch.tensor([source_nodes, target_nodes], dtype=torch.long)

# Campaign -> Channel
source_nodes = df['campaign_item_id'].map(campaign_id_to_index).values
target_nodes = df['channel_id'].map(channel_id_to_index).values
edge_index_campaign_channel = torch.tensor([source_nodes, target_nodes], dtype=torch.long)

# Platform -> Channel
source_nodes = df['ext_service_id'].map(platform_id_to_index).values
target_nodes = df['channel_id'].map(channel_id_to_index).values
edge_index_platform_channel = torch.tensor([source_nodes, target_nodes], dtype=torch.long)

# Platform -> time
source_nodes = df['ext_service_id'].map(platform_id_to_index).values


# Creative -> Campaign
source_nodes = df['creative_id'].map(creative_id_to_index).values
target_nodes = df['campaign_item_id'].map(campaign_id_to_index).values
edge_index_creative_campaign = torch.tensor([source_nodes, target_nodes], dtype=torch.long)

# Creative -> Template
source_nodes = df['creative_id'].map(creative_id_to_index).values
target_nodes = df['template_id'].map(template_id_to_index).values
edge_index_creative_template = torch.tensor([source_nodes, target_nodes], dtype=torch.long)

# 


# Edge features (representing relationships like `no_of_days`, `campaign_budget_usd`, etc.)
edge_attr_campaign_platform = torch.tensor([[10], [20]], dtype=torch.float)  # Example edge features for Campaign to Platform
edge_attr_campaign_advertiser = torch.tensor([[5000], [7000]], dtype=torch.float)  # Example edge features for Campaign to Advertiser
edge_attr_campaign_creative = torch.tensor([[300], [400]], dtype=torch.float)  # Example edge features for Campaign to Creative
edge_attr_platform_creative = torch.tensor([[250], [200]], dtype=torch.float)  # Example edge features for Platform to Creative
edge_attr_campaign_time = torch.tensor([[5], [6]], dtype=torch.float)  # Example edge features for Campaign to Time

# Node features: Concat the node features for all nodes (Campaign, Platform, etc.)
node_features = torch.cat([campaign_features, platform_features, advertiser_features, creative_features, time_features], dim=0)

# Combine all edge indices and features
edge_index = torch.cat([edge_index_campaign_platform, edge_index_campaign_advertiser, edge_index_campaign_creative, edge_index_platform_creative, edge_index_campaign_time], dim=1)
edge_attr = torch.cat([edge_attr_campaign_platform, edge_attr_campaign_advertiser, edge_attr_campaign_creative, edge_attr_platform_creative, edge_attr_campaign_time], dim=0)

# Create the graph data object in PyTorch Geometric format
data = Data(x=node_features, edge_index=edge_index, edge_attr=edge_attr)

# Now, we have a graph where:
# - `x` is the node features
# - `edge_index` is the relationship between nodes
# - `edge_attr` is the relationship's features (e.g., `no_of_days`, `campaign_budget_usd`)

print(data)


In [None]:
class GNNModel(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GNNModel, self).__init__()
        # Define two GCN layers
        self.conv1 = GCNConv(in_channels, 64)
        self.conv2 = GCNConv(64, out_channels)
        
    def forward(self, x, edge_index, edge_attr):
        # First GCN layer
        x = self.conv1(x, edge_index, edge_attr)
        x = torch.relu(x)
        
        # Second GCN layer
        x = self.conv2(x, edge_index, edge_attr)
        return x

# Assuming input features have shape (num_nodes, num_features)
in_channels = node_features.shape[1]  # Number of features per node
out_channels = 1  # Output will be a single prediction (e.g., number of clicks)

# Instantiate the model
model = GNNModel(in_channels, out_channels)

# Define a dummy optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.MSELoss()  # Assuming you're predicting a continuous value like clicks

# Forward pass (example)
output = model(data.x, data.edge_index, data.edge_attr)
print(output)
