In [1]:
import pandas as pd
import numpy as np
import csv
import random
import datetime
import boto3
import schedule
import time
import import_ipynb
import nbimporter
from faker import Faker
from statistics import mean

In [2]:
faker = Faker()

def generate_conversation_data(num_records):
    data = []
    for _ in range(num_records):
        data.append({
            'customer_id': faker.uuid4(),
            'conversation_id': faker.uuid4(),
            'agent_id': faker.uuid4(),
            'call_start': faker.date_time_this_year(),
            'call_end': faker.date_time_this_year(),
            'call_duration_sec': random.randint(30, 600),
            'call_status': random.choice(['Answered', 'Missed', 'Rejected', 'Hangup', 'Voicemail']),
            'transcript': faker.sentence(),
            'sentiment_score': random.uniform(0, 1),
            'keywords': [faker.word() for _ in range(random.randint(1, 5))],
            'created_at': datetime.datetime.now(),
            'updated_at': datetime.datetime.now()
        })
    return data

synthetic_data = generate_conversation_data(num_records=1000)

In [3]:
def calculate_metrics(data):
    metrics = {}
    agents = set(record['agent_id'] for record in data)
    for agent in agents:
        agent_data = [record['call_duration_sec'] for record in data if record['agent_id'] == agent]
        metrics[agent] = {
            'average_call_length': mean(agent_data),
            '90th_percentile_call_length': np.percentile(agent_data, 90)
        }
    return metrics

metrics = calculate_metrics(synthetic_data)

In [4]:
def write_metrics_to_csv(metrics, csv_filename):
    with open(csv_filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Agent ID', 'Average Call Length', '90th Percentile Call Length'])
        for agent, metric in metrics.items():
            writer.writerow([agent, metric['average_call_length'], metric['90th_percentile_call_length']])

csv_filename = 'metrics.csv'
write_metrics_to_csv(metrics, csv_filename)

In [5]:
# Import the `upload_csv_to_s3` function from the notebook
from s3_upload import upload_csv_to_s3

# Specify the necessary parameters
csv_filename = 'metrics.csv'
bucket_name = 'conversationcustomerdata'

# Call the upload_csv_to_s3 function
upload_csv_to_s3(csv_filename, bucket_name)

importing Jupyter notebook from s3_upload.ipynb


In [6]:
import datetime

def run_daily_task():
    synthetic_data = generate_conversation_data(num_records=1000)
    print("Synthetic data generated.")
    
    metrics = calculate_metrics(synthetic_data)
    print("Metrics calculated.")
    
    csv_filename = 'metrics.csv'
    write_metrics_to_csv(metrics, csv_filename)
    print("Metrics written to CSV file.")
    
    bucket_name = 'conversationcustomerdata'
    upload_csv_to_s3(csv_filename, bucket_name)
    print("CSV file uploaded to S3.")

# Check if it's time to execute the task
current_time = datetime.datetime.now().strftime("%H:%M")
scheduled_time = "16:15"  # Set the desired scheduled time

if current_time >= scheduled_time:
    print("Executing daily task...")
    run_daily_task()
else:
    print("It's not yet time to execute the daily task.")

Executing daily task...
Synthetic data generated.
Metrics calculated.
Metrics written to CSV file.
CSV file uploaded to S3.
