In [None]:
import requests
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.models import resnet18
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import io
from sentence_transformers import SentenceTransformer

# Facebook API
ACCESS_TOKEN = "ACCESS_TOKEN"
AD_ACCOUNT_ID = "2116029228431929"
API_VERSION = "v18.0" #current v13.0 or v22.0
BASE_URL = f"https://graph.facebook.com/{API_VERSION}/act_{AD_ACCOUNT_ID}/ads"

In [None]:
# Load pre-trained text embedding model
text_model = SentenceTransformer('all-MiniLM-L6-v2')

# Fetch Ads Data from Facebook API
def fetch_ads():
    """Fetch advertisement data from Facebook API."""
    params = {
        "fields": "id,creative{image_url,body},insights{cpm,age,gender,location}",
        "access_token": ACCESS_TOKEN
    }
    response = requests.get(BASE_URL, params=params)
    if response.status_code != 200:
      print(f"Error: {response.status_code} - {response.text}")
      return []


    if "data" not in data or not data["data"]:
        print("No data fetched from API.")
        return []

    return data["data"]  # Return the list of ads

# Process API Data into a DataFrame
def process_data(raw_data):
    """Process raw advertisement data and extract relevant fields."""
    ads = []
    for ad in raw_data:
        ad_id = ad.get("id", "")
        image_url = ad.get("creative", {}).get("image_url", "")
        caption = ad.get("creative", {}).get("body", "")
        insights = ad.get("insights", {}).get("data", [{}])[0]
        cpm = float(insights.get("cpm", 0))
        age_range = insights.get("age", "unknown")
        gender = insights.get("gender", "unknown")
        location = insights.get("location", "unknown")

        if not image_url or not caption or cpm == 0:
            continue

        label = "Okay" if cpm <= 70 else "Not Okay"
        ads.append([ad_id, image_url, caption, cpm, age_range, gender, location, label])

    df = pd.DataFrame(ads, columns=["ad_id", "image_url", "caption", "cpm", "age_range", "gender", "location", "label"])
    df.dropna(inplace=True)
    df.to_csv("ads_data.csv", index=False)
    print("Processed Data Head:")
    print(df.head())
    return df

In [None]:
# Custom Dataset Class
class AdDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        self.label_encoder = LabelEncoder()
        self.df["label_encoded"] = self.label_encoder.fit_transform(self.df["label"])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = self.load_image_from_url(row["image_url"])
        if self.transform:
            image = self.transform(image)

        text_embedding = torch.tensor(text_model.encode(row["caption"]), dtype=torch.float32)
        label = torch.tensor(self.df.loc[idx, "label_encoded"], dtype=torch.long)
        cpm = torch.tensor([row["cpm"]], dtype=torch.float32)

        return image, text_embedding, cpm, label

    def load_image_from_url(self, url):
        """Load image from URL or return a blank image if failed."""
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            return Image.open(io.BytesIO(response.content)).convert("RGB")
        return Image.new("RGB", (224, 224), (255, 255, 255))  # Default blank image

In [None]:
# Image Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Neural Network Model
class AdClassifier(nn.Module):
    def __init__(self):
        super(AdClassifier, self).__init__()
        self.cnn = resnet18(pretrained=True)
        self.cnn.fc = nn.Linear(512, 128)
        self.text_fc = nn.Linear(384, 128)
        self.cpm_fc = nn.Linear(1, 32)
        self.fc = nn.Linear(128 + 128 + 32, 2)

    def forward(self, image, text_embedding, cpm):
        image_features = self.cnn(image)
        text_features = self.text_fc(text_embedding)
        cpm_features = self.cpm_fc(cpm)
        combined = torch.cat((image_features, text_features, cpm_features), dim=1)
        return self.fc(combined)



In [None]:
# Train-Test Split
def split_data(df):
    return train_test_split(df, test_size=0.2, random_state=42)

# Training Function
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, text_embeddings, cpm, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images, text_embeddings, cpm)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")



In [None]:
# Evaluation Function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, text_embeddings, cpm, labels in test_loader:
            outputs = model(images, text_embeddings, cpm)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Accuracy: {100 * correct / total:.2f}%")



In [None]:
 # Main Execution
def main():
    raw_data = fetch_ads()
    if not raw_data:
        print("No data available. Exiting.")
        return

    df = process_data(raw_data)

    # Train-Test Split
    train_df, test_df = split_data(df)
    train_dataset = AdDataset(train_df, transform=transform)
    test_dataset = AdDataset(test_df, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32)

    # Model, Loss, Optimizer
    model = AdClassifier()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train and Evaluate
    train_model(model, train_loader, criterion, optimizer)
    evaluate_model(model, test_loader)


 # Save Model
    torch.save(model.state_dict(), "ad_classifier.pth")
    print("Model saved!")

if __name__ == "__main__":
    main()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import resnet18
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load Actual Dataset
df = pd.read_csv("ads_data.csv")  # this contains 'age_range', 'gender', 'location', 'cpm', and 'label'

# Define Dataset Class (Extended for Demographics)
class AdDatasetWithDemographics(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

        # Encode demographic features
        self.age_encoder = LabelEncoder()
        self.gender_encoder = LabelEncoder()
        self.location_encoder = LabelEncoder()

        self.df["age_encoded"] = self.age_encoder.fit_transform(self.df["age_range"])
        self.df["gender_encoded"] = self.gender_encoder.fit_transform(self.df["gender"])
        self.df["location_encoded"] = self.location_encoder.fit_transform(self.df["location"])  # Uses actual locations

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        image = torch.randn(3, 224, 224)  # Placeholder for image tensor
        text_embedding = torch.randn(384)  # Placeholder for text embedding
        cpm = torch.tensor([row["cpm"]], dtype=torch.float32)

        # Convert demographic features into tensors
        age = torch.tensor([row["age_encoded"]], dtype=torch.float32)
        gender = torch.tensor([row["gender_encoded"]], dtype=torch.float32)
        location = torch.tensor([row["location_encoded"]], dtype=torch.float32)

        label = torch.tensor(row["label"], dtype=torch.long)

        return image, text_embedding, cpm, age, gender, location, label

# Define Model with Demographics
class AdClassifierWithDemographics(nn.Module):
    def __init__(self):
        super(AdClassifierWithDemographics, self).__init__()
        self.cnn = resnet18(pretrained=True)
        self.cnn.fc = nn.Linear(512, 128)
        self.text_fc = nn.Linear(384, 128)
        self.cpm_fc = nn.Linear(1, 32)
        self.demographics_fc = nn.Linear(3, 32)  # Age, gender, location (3 inputs)
        self.fc = nn.Linear(128 + 128 + 32 + 32, 2)

    def forward(self, image, text_embedding, cpm, age, gender, location):
        image_features = self.cnn(image)
        text_features = self.text_fc(text_embedding)
        cpm_features = self.cpm_fc(cpm)
        demographics_features = self.demographics_fc(torch.cat((age, gender, location), dim=1))
        combined = torch.cat((image_features, text_features, cpm_features, demographics_features), dim=1)
        return self.fc(combined)

# Function to Train Model
def train_model(model, train_loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, text_embeddings, cpm, age, gender, location, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images, text_embeddings, cpm, age, gender, location)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")

# Function to Evaluate Model
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, text_embeddings, cpm, age, gender, location, labels in test_loader:
            outputs = model(images, text_embeddings, cpm, age, gender, location)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Accuracy: {100 * correct / total:.2f}%")

# Train-Test Split (Using Real Data)
train_df, test_df = df[:75], df[75:]
train_dataset = AdDatasetWithDemographics(train_df)
test_dataset = AdDatasetWithDemographics(test_df)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8)

# Initialize Model and Optimizer
model = AdClassifierWithDemographics()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train and Evaluate
print("Training model...")
train_model(model, train_loader, criterion, optimizer)
evaluate_model(model, test_loader)

# Save Model
torch.save(model.state_dict(), "ad_classifier_with_demographics.pth")
print("Model saved!")
