In [40]:
from concurrent.futures import ThreadPoolExecutor
from faker import Faker
import pandas as pd

In [41]:
fake_nl = Faker('nl_NL')

In [42]:
fake_en = Faker('en_US')

In [43]:
# List of common email domains in the Netherlands
email_domains = ['gmail.com', 'hotmail.com', 'yahoo.com', 'outlook.com', 'ziggo.nl', 'mns.nl', 'live.com', 'icloud.com']


In [44]:
# Number of rows (for testing purposes, adjust as needed)
num_rows = 3000000


In [45]:
import csv

In [46]:
# Function to generate synthetic data for a single row
def generate_synthetic_email():
    prefix = fake_nl.user_name()
    domain = fake_nl.random_element(elements=email_domains)
    synthetic_email = f"{prefix}@{domain}"
    return synthetic_email

def generate_data(_):
    sender_info = {
        "Sender ID": fake_nl.uuid4(),
        "Sender Name": "UITDELEN NGO",
        "Sender Email": "uitdelenngo@daily.nl",
    }

    recipient_info = {
        "Recipient ID": fake_nl.uuid4(),
        "Recipient Name": fake_nl.name(),
        "Recipient Email": generate_synthetic_email(),
        "Recipient Segmentation": fake_nl.random_element(elements=['Donor', 'Volunteer', 'Sponsor', 'Partner']),
    }
    if recipient_info["Recipient Segmentation"] == 'Donor':
        content_details = {
            "Subject Line": "Support UITDELEN NGO: Your Contribution Matters!",
            "Email Body": f"Dear {recipient_info['Recipient Name']},\n\n"
                          f"Thank you for your continued support as a valued donor of UITDELEN NGO. "
                          f"Your generosity helps us provide meals to children in Eastern Europe every day. "
                          f"We appreciate your commitment to making a positive impact on their lives.\n\n"
                          f"Sincerely,\n"
                          f"{sender_info['Sender Name']}",
        }
    elif recipient_info["Recipient Segmentation"] == 'Volunteer':
        content_details = {
            "Subject Line": "Join UITDELEN NGO: Volunteer Opportunities Await!",
            "Email Body": f"Dear {recipient_info['Recipient Name']},\n\n"
                          f"We invite you to join UITDELEN NGO as a volunteer and be a part of our mission "
                          f"to ensure that every child in Eastern Europe has a meal every day. Your dedication "
                          f"can make a meaningful difference in their lives.\n\n"
                          f"Best regards,\n"
                          f"{sender_info['Sender Name']}",
        }
    elif recipient_info["Recipient Segmentation"] == 'Sponsor':
        content_details = {
            "Subject Line": "UITDELEN NGO Sponsorship: Together We Can Make a Difference!",
            "Email Body": f"Dear {recipient_info['Recipient Name']},\n\n"
                          f"At UITDELEN NGO, we believe in the power of partnerships. Your sponsorship "
                          f"can contribute to our mission of providing meals to children in Eastern Europe. "
                          f"Let's make a positive impact together!\n\n"
                          f"Thank you for considering sponsorship.\n\n"
                          f"Sincerely,\n"
                          f"{sender_info['Sender Name']}",
        }
    elif recipient_info["Recipient Segmentation"] == 'Partner':
        content_details = {
            "Subject Line": "UITDELEN NGO Partnership: Let's Collaborate for a Cause!",
            "Email Body": f"Dear {recipient_info['Recipient Name']},\n\n"
                          f"We are reaching out to explore potential partnerships with organizations "
                          f"that share our vision. Together, we can make a lasting impact on the lives of children "
                          f"in Eastern Europe. Let's collaborate for a cause!\n\n"
                          f"Looking forward to the possibility of partnership.\n\n"
                          f"Best regards,\n"
                          f"{sender_info['Sender Name']}",
        }   

        
    timestamps = {
        "Date Sent": fake_nl.date_this_decade(),
        "Time Sent": fake_nl.time(),
        "Day of Week": fake_nl.random_element(elements=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']),
        "Month": fake_en.month_name(),
        "Year": fake_nl.date_this_decade().year,
    }

    engagement_metrics = {
        "Open Rate": fake_nl.random_int(0, 100),
        "Click-Through Rate": fake_nl.random_int(0, 100),
        "Conversion Rate": fake_nl.random_int(0, 100),
        "Bounce Rate": fake_nl.random_int(0, 10),
        "Unsubscribe Rate": fake_nl.random_int(0, 5),
    }

    # Add other dictionaries as needed

    # Combine all dictionaries into one
    row_data = {**sender_info, **recipient_info, **content_details, **timestamps, **engagement_metrics}

    return row_data


def generate_campaign_data(num_campaigns, num_recipients_per_campaign):
    campaign_data = []

    for campaign_id in range(num_campaigns):
        recipients = [fake_nl.uuid4() for _ in range(num_recipients_per_campaign)]
        for recipient_id in recipients:
            campaign_data.append(generate_data(recipient_id))

    return campaign_data

def main():
    num_campaigns = 100  # Adjust the number of campaigns
    num_recipients_per_campaign = 2000 # Adjust the number of recipients per campaign

    # Specify the CSV file path
    csv_file_path = 'email_campaign_data.csv'

    campaign_data = generate_campaign_data(num_campaigns, num_recipients_per_campaign)

    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
        fieldnames = campaign_data[0].keys()
        csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        # Write header
        csv_writer.writeheader()

        # Write data rows
        csv_writer.writerows(campaign_data)

    print(f"Synthetic data has been generated and saved to {csv_file_path}.")

if __name__ == "__main__":
    main()

# Use ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor(max_workers=4) as executor:  # Adjust max_workers as needed
    # Generate synthetic data using parallel processing
    data = list(executor.map(generate_data, range(num_rows)))


Synthetic data has been generated and saved to email_campaign_data.csv.


In [47]:
# Create a DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("synthetic_uitdelenngo_email_campaign_dataset_small.csv3", index=False)

print("Synthetic UITDELEN NGO email campaign dataset created and saved to synthetic_uitdelenngo_email_campaign_dataset_small.csv")


Synthetic UITDELEN NGO email campaign dataset created and saved to synthetic_uitdelenngo_email_campaign_dataset_small.csv


In [48]:
email_data = pd.read_csv("/Users/susanndinoshinge/Downloads/Data_Analytics/synthetic_uitdelenngo_email_campaign_dataset_small.csv3")

In [49]:
email_data.head(10)

Unnamed: 0,Sender ID,Sender Name,Sender Email,Recipient ID,Recipient Name,Recipient Email,Recipient Segmentation,Subject Line,Email Body,Date Sent,Time Sent,Day of Week,Month,Year,Open Rate,Click-Through Rate,Conversion Rate,Bounce Rate,Unsubscribe Rate
0,109f224b-0799-4c7e-b8cc-748c5d5d2ced,UITDELEN NGO,uitdelenngo@daily.nl,4494dbd0-08b5-47bd-8748-531f93cef443,Noor Brouwer-van der Pluijm,stoffelelize@hotmail.com,Donor,Support UITDELEN NGO: Your Contribution Matters!,"Dear Noor Brouwer-van der Pluijm,\n\nThank you...",2023-10-06,05:43:01,Thursday,April,2022,91,58,4,8,5
1,271f9284-3615-4875-bf8b-5c8948d1a2ba,UITDELEN NGO,uitdelenngo@daily.nl,f8ed6a77-b58b-4648-9af2-c938dc21dc57,David Moensendijk-Eijkelboom,sam74@gmail.com,Donor,Support UITDELEN NGO: Your Contribution Matters!,"Dear David Moensendijk-Eijkelboom,\n\nThank yo...",2023-03-26,00:54:14,Friday,August,2023,53,12,34,0,1
2,4678bd3e-9a25-4916-a90c-c21e4e63cef5,UITDELEN NGO,uitdelenngo@daily.nl,9b889d69-ad89-4fb3-9b95-39ceb7eb180b,Elisabeth van Straaten,elize97@hotmail.com,Sponsor,UITDELEN NGO Sponsorship: Together We Can Make...,"Dear Elisabeth van Straaten,\n\nAt UITDELEN NG...",2021-09-22,15:05:36,Tuesday,November,2020,15,60,69,1,1
3,f40a93f4-0952-4d9d-b632-e3f09931cebb,UITDELEN NGO,uitdelenngo@daily.nl,dc8e445b-1e33-4bf2-b1f7-d85febf994b6,Brian Hanegraaff,xavi00@hotmail.com,Volunteer,Join UITDELEN NGO: Volunteer Opportunities Await!,"Dear Brian Hanegraaff,\n\nWe invite you to joi...",2022-03-30,05:11:15,Friday,August,2020,33,89,91,3,5
4,1b79f136-6062-42b0-b804-9af1c87a2b24,UITDELEN NGO,uitdelenngo@daily.nl,8bdfdb0c-1695-4c9f-8a38-9377d2a60163,Zoë Takkelenburg,ashleyvan-den-brink@yahoo.com,Volunteer,Join UITDELEN NGO: Volunteer Opportunities Await!,"Dear Zoë Takkelenburg,\n\nWe invite you to joi...",2020-08-02,06:17:39,Tuesday,November,2021,24,36,68,8,4
5,e9da8deb-35d6-4285-a0a6-5e2583c6f7de,UITDELEN NGO,uitdelenngo@daily.nl,c6d9badd-d6d2-4ec0-a8bd-67fec83a1c41,Lot Verhaar,psalet@live.com,Sponsor,UITDELEN NGO Sponsorship: Together We Can Make...,"Dear Lot Verhaar,\n\nAt UITDELEN NGO, we belie...",2023-06-21,19:59:04,Thursday,June,2023,95,53,2,5,1
6,be882851-896e-4df7-a50d-5215388475c1,UITDELEN NGO,uitdelenngo@daily.nl,59dca733-91c1-4917-8a0a-fee2af367d57,Sten van Beeck Beeckmans,alyssavan-den-nuwenhijsen@mns.nl,Volunteer,Join UITDELEN NGO: Volunteer Opportunities Await!,"Dear Sten van Beeck Beeckmans,\n\nWe invite yo...",2021-10-27,17:25:47,Thursday,October,2020,84,82,10,9,3
7,46c14f36-5c31-4e87-b7c7-392dd600c572,UITDELEN NGO,uitdelenngo@daily.nl,2969ed63-3818-4631-aa3f-d4268819ecc1,Joëlle Zijlemans,van-der-walquinten@yahoo.com,Sponsor,UITDELEN NGO Sponsorship: Together We Can Make...,"Dear Joëlle Zijlemans,\n\nAt UITDELEN NGO, we ...",2023-08-09,08:17:47,Wednesday,May,2022,42,26,5,1,0
8,1c203f4f-a602-4447-a226-d0d08a1e47d1,UITDELEN NGO,uitdelenngo@daily.nl,f33195a9-1ffe-4ff4-af14-c59f2fe8273f,Yassin Claassen,acoreth-von-und-zu-coredo-und-starkenberg@live...,Sponsor,UITDELEN NGO Sponsorship: Together We Can Make...,"Dear Yassin Claassen,\n\nAt UITDELEN NGO, we b...",2024-01-06,11:39:03,Wednesday,April,2024,45,2,46,6,2
9,bb2c49c0-4cc4-403c-8316-452c72edf3a4,UITDELEN NGO,uitdelenngo@daily.nl,4d8c5b73-e4b3-42d3-8297-3b872dc28e16,Iris Guit,jens37@hotmail.com,Partner,UITDELEN NGO Partnership: Let's Collaborate fo...,"Dear Iris Guit,\n\nWe are reaching out to expl...",2023-04-10,22:00:17,Wednesday,January,2021,96,82,79,0,1
