In [1]:
%run ../common.ipynb

In [2]:
import boto3

# Initialize DynamoDB resource
dynamodb = boto3.resource('dynamodb')

# Define table name
table_name = "CustomerSupport"

# Create the table
table = dynamodb.create_table(
    TableName=table_name,
    KeySchema=[
        {"AttributeName": "TicketID", "KeyType": "HASH"},  # Partition key
        {"AttributeName": "CustomerID", "KeyType": "RANGE"}  # Sort key
    ],
    AttributeDefinitions=[
        {"AttributeName": "TicketID", "AttributeType": "S"},  # String type
        {"AttributeName": "CustomerID", "AttributeType": "S"}  # String type
    ],
    ProvisionedThroughput={
        "ReadCapacityUnits": 1,
        "WriteCapacityUnits": 1
    }
)

# Wait for the table to be created
table.wait_until_exists()

print(f"DynamoDB Table '{table_name}' created successfully!")


DynamoDB Table 'CustomerSupport' created successfully!


In [3]:
import boto3

# Initialize DynamoDB resource
dynamodb = boto3.resource('dynamodb')

# Define table name
table_name = "EnterpriseCampaigns"

# Create the table
table = dynamodb.create_table(
    TableName=table_name,
    KeySchema=[
        {"AttributeName": "CampaignID", "KeyType": "HASH"},  # Partition key
        {"AttributeName": "CustomerID", "KeyType": "RANGE"}  # Sort key
    ],
    AttributeDefinitions=[
        {"AttributeName": "CampaignID", "AttributeType": "S"},  # String type
        {"AttributeName": "CustomerID", "AttributeType": "S"}  # String type
    ],
    ProvisionedThroughput={
        "ReadCapacityUnits": 1,
        "WriteCapacityUnits": 1
    }
)

# Wait for the table to be created
table.wait_until_exists()

print(f"DynamoDB Table '{table_name}' created successfully!")


DynamoDB Table 'EnterpriseCampaigns' created successfully!


In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, expr, rand, when
import random

# Initialize Spark session
spark = SparkSession.builder.appName("Load_dynamodb_table") \
    .config("spark.jars.packages", "mysql:mysql-connector-java:8.0.33") \
    .getOrCreate()


SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/glue_user/spark/jars/log4j-slf4j-impl-2.17.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/glue_user/spark/jars/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/glue_user/aws-glue-libs/jars/log4j-slf4j-impl-2.17.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/glue_user/aws-glue-libs/jars/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]


:: loading settings :: url = jar:file:/home/glue_user/spark/jars/ivy-2.5.0.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/glue_user/.ivy2/cache
The jars for the packages stored in: /home/glue_user/.ivy2/jars
mysql#mysql-connector-java added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-661c2a8e-11a2-4548-af11-99b1e6b85d5b;1.0
	confs: [default]
	found mysql#mysql-connector-java;8.0.33 in central
	found com.mysql#mysql-connector-j;8.0.33 in central
	found com.google.protobuf#protobuf-java;3.21.9 in central
:: resolution report :: resolve 325ms :: artifacts dl 11ms
	:: modules in use:
	com.google.protobuf#protobuf-java;3.21.9 from central in [default]
	com.mysql#mysql-connector-j;8.0.33 from central in [default]
	mysql#mysql-connector-java;8.0.33 from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	------------------------------------------------------------

In [7]:

import uuid
import random
from decimal import Decimal
from datetime import datetime


# Read customer data from MySQL
customers_df = spark.read.jdbc(url=USER_MYSQL_URL, table="Customers", properties=MYSQL_PROPERTIES)
customer_ids = [row.customer_id for row in customers_df.select("customer_id").collect()]

# Initialize DynamoDB client
dynamodb = boto3.resource('dynamodb')

# Table name (ensure the table exists in DynamoDB)
TABLE_NAME = "EnterpriseCampaigns"
table = dynamodb.Table(TABLE_NAME)

# Generate complex campaign data using MySQL customer IDs
# Generate complex campaign data with nested structures
def generate_complex_campaign_data():
    customer_id = random.choice(customer_ids) if customer_ids else str(uuid.uuid4())
    return {
        'CampaignID': str(uuid.uuid4()),
        'CustomerID': str(customer_id),
        'Customer': {
            'CustomerID': str(customer_id),
            'Email': f'user{random.randint(1000, 9999)}@enterprise.com'
        },
        'Engagement': {
            'Clicks': random.randint(0, 500),
            'PromotionsReceived': random.randint(1, 20),
            'Opens': random.randint(0, 100),
            'ConversionRate': Decimal(str(round(random.uniform(0, 1), 2))),
            'RevenueGenerated': Decimal(str(round(random.uniform(10, 1000), 2)))
        },
        'CampaignDetails': {
            'Type': random.choice(['Email', 'SMS', 'Push Notification', 'Social Media']),
            'Region': random.choice(['North America', 'Europe', 'Asia', 'South America', 'Africa']),
            'DeviceType': random.choice(['Mobile', 'Desktop', 'Tablet'])
        },
        'Interactions': [
            {
                'InteractionID': str(uuid.uuid4()),
                'Timestamp': datetime.utcnow().isoformat(),
                'Channel': random.choice(['Email', 'SMS', 'Web', 'App']),
                'Action': random.choice(['Opened', 'Clicked', 'Ignored'])
            } for _ in range(random.randint(1, 5))
        ],
        'Timestamp': datetime.utcnow().isoformat()
    }

# Insert data into DynamoDB
def insert_campaign_data():
    campaign_data = generate_complex_campaign_data()
    response = table.put_item(Item=campaign_data)
    #print("Inserted item:", campaign_data)
    return response

# Insert multiple records
def insert_bulk_data(count=10):
    for _ in range(count):
        insert_campaign_data()

if __name__ == "__main__":
    insert_bulk_data(100)  


In [8]:


# Read customer data from MySQL
customers_df = spark.read.jdbc(url=USER_MYSQL_URL, table="Customers", properties=MYSQL_PROPERTIES)
customer_ids = [row.customer_id for row in customers_df.select("customer_id").collect()]

# Initialize DynamoDB client
dynamodb = boto3.resource('dynamodb')

# Table name (ensure the table exists in DynamoDB)
TABLE_NAME = "CustomerSupport"
table = dynamodb.Table(TABLE_NAME)

# Generate complex customer service data using MySQL customer IDs
def generate_support_ticket():
    customer_id = random.choice(customer_ids) if customer_ids else str(uuid.uuid4())
    return {
        'TicketID': str(uuid.uuid4()),
        'CustomerID': str(customer_id),
        'Customer': {
            'CustomerID': customer_id,
            'Email': f'user{random.randint(1000, 9999)}@support.com'
        },
        'Issue': {
            'Category': random.choice(['Billing', 'Technical', 'General Inquiry', 'Account', 'Other']),
            'Priority': random.choice(['Low', 'Medium', 'High', 'Critical']),
            'Status': random.choice(['Open', 'In Progress', 'Resolved', 'Closed'])
        },
        'Interactions': [
            {
                'InteractionID': str(uuid.uuid4()),
                'Timestamp': datetime.utcnow().isoformat(),
                'Type': random.choice(['Chatbot', 'Agent', 'Email', 'Phone']),
                'Notes': random.choice(['Resolved in first contact', 'Escalated to level 2', 'Pending customer response'])
            } for _ in range(random.randint(1, 5))
        ],
        'Resolution': {
            'TimeHours': random.randint(1, 72),
            'CustomerSatisfaction': random.choice(['Very Satisfied', 'Satisfied', 'Neutral', 'Dissatisfied', 'Very Dissatisfied'])
        },
        'Timestamp': datetime.utcnow().isoformat()
    }

# Insert data into DynamoDB
def insert_support_ticket():
    ticket_data = generate_support_ticket()
    response = table.put_item(Item=ticket_data)
    #print("Inserted item:", ticket_data)
    return response

# Insert multiple records
def insert_bulk_tickets(count=10):
    for _ in range(count):
        insert_support_ticket()

if __name__ == "__main__":
    insert_bulk_tickets(500)  
