# Load Email Addresses

In [1]:
experiment_id = 4

In [2]:
%%query [lms] (experiment_id=experiment_id) -> users
SELECT
    exp.user_id,
    exp.course_id,
    exp.value AS json_data,
    au.email AS user_email,
    COALESCE(aup.name, au.username) AS student_name,
    sce.mode AS enrollment_mode,
    cm.sku
FROM
(
    SELECT
        user_id,
        SUBSTRING(`key` FROM 26) AS course_id,
        value
    FROM
        experiments_experimentdata
    WHERE
        experiment_id = :experiment_id
) AS exp
JOIN
    auth_user au ON au.id = exp.user_id
JOIN
    auth_userprofile aup ON aup.user_id = exp.user_id
LEFT JOIN
    bulk_email_optout AS eo ON eo.user_id = exp.user_id
LEFT JOIN
    student_courseenrollment AS sce ON sce.user_id = exp.user_id AND sce.course_id = exp.course_id
LEFT JOIN
    course_modes_coursemode AS cm ON cm.course_id = exp.course_id AND cm.mode_slug = 'verified'
WHERE
        eo.user_id IS NULL
    AND sce.is_active = 1

In [3]:
import ciso8601
import json
import datetime
import math

today_utc = datetime.datetime.utcnow().date()

users['segment'] = users['json_data'].apply(lambda value: json.loads(value)['segment'])
users['cohort_availability'] = users['json_data'].apply(lambda value: ciso8601.parse_datetime(json.loads(value)['cohort_availability']).date())
users['segmentation_reason'] = users['json_data'].apply(lambda value: json.loads(value).get('segmentation_reason', 'visit'))
users['week_number'] = users['cohort_availability'].apply(lambda d: math.floor((today_utc - d).days / 7.0))
users['verified_upgrade_deadline'] = users['cohort_availability'].apply(lambda d: d + datetime.timedelta(days=20))

In [4]:
%%vertica -> course_titles
SELECT
    course_id,
    catalog_course_title AS course_name
FROM
    production.d_course

In [5]:
course_titles.head()

Unnamed: 0,course_id,course_name
0,AdelaideX/HumBio101x/1T2015,Essential Human Biology: Cells and Tissues
1,ak/akSState/1T2014,TestAK
2,ANUx/ANU-ASTRO1x/1T2014,Greatest Unsolved Mysteries of the Universe
3,ANUx/ANU-ASTRO2x/2T2014,Exoplanets
4,ANUx/ANU-ASTRO3x/4T2014,The Violent Universe


In [6]:
import pandas as pd

treatment_users = pd.merge(
    users[users.segment == 1],
    course_titles,
    on='course_id',
    how='inner'
)

In [7]:
assert len(users[users.segment == 1]) == len(treatment_users)

We expect two cohorts of users before sending the email each week (except the first week)

In [8]:
treatment_users.groupby('week_number').user_email.count()

week_number
4    2593
5    2481
Name: user_email, dtype: int64

In [9]:
treatment_users.groupby(['week_number', 'cohort_availability']).user_email.count()

week_number  cohort_availability
4            2017-07-26             2593
5            2017-07-19             2481
Name: user_email, dtype: int64

In [10]:
treatment_users.groupby(['course_id', 'week_number']).user_email.count()

course_id                               week_number
course-v1:AdelaideX+Project101x+1T2017  4              343
                                        5              606
course-v1:ColumbiaX+DS101X+1T2017       4              231
                                        5              212
course-v1:DelftX+LfE101x+2T2017         4              500
                                        5              154
course-v1:HarvardX+PH525.1x+2T2017      4              501
                                        5              477
course-v1:IIMBx+AC103x+2T2017           4              149
                                        5              127
course-v1:MichiganX+UX501x+3T2016       4              121
                                        5              120
course-v1:PennX+SD1x+2T2017             4              188
                                        5              260
course-v1:PennX+SD2x+2T2017             4              110
                                        5              159
cour

# Load the email contents

In [11]:
%%query [lms] (experiment_id) -> email_content
SELECT
    exp.key,
    exp.value AS 'email_content'
FROM
    experiments_experimentkeyvalue AS exp
WHERE
    experiment_id = :experiment_id

In [12]:
email_content.head()

Unnamed: 0,key,email_content
0,content.course-v1:AdelaideX+Project101x+1T2017.0,"<ul class=""ret-pacing-weekly-content"" data-wee..."
1,content.course-v1:AdelaideX+Project101x+1T2017.1,"<ul class=""ret-pacing-weekly-content"" data-wee..."
2,content.course-v1:AdelaideX+Project101x+1T2017.2,"<ul class=""ret-pacing-weekly-content"" data-wee..."
3,content.course-v1:AdelaideX+Project101x+1T2017.3,"<ul class=""ret-pacing-weekly-content"" data-wee..."
4,content.course-v1:AdelaideX+Project101x+1T2017.4,"<ul class=""ret-pacing-weekly-content"" data-wee..."


In [13]:
def join_email_content(email_df):
    email_df['email_key'] = email_df.apply('content.{0[course_id]}.{0[week_number]}'.format, axis=1)
    merged = email_df.merge(email_content, left_on='email_key', right_on='key')
    return merged

In [14]:
week_emails = join_email_content(treatment_users)

In [15]:
len(week_emails)

2893

# Set up Sailthru Infrastructure

In [16]:
import os
sailthru_api_key = os.getenv('OPS_SAILTHRU_API_KEY')
sailthru_api_secret = os.getenv('OPS_SAILTHRU_API_SECRET')

import getpass
if sailthru_api_key is None:
    sailthru_api_key = getpass.getpass(prompt='sailthru_api_key')
if sailthru_api_secret is None:
    sailthru_api_secret = getpass.getpass(prompt='sailthru_api_secret')

In [17]:
TEMPLATE_NAME = 'RET Pacing'
MAX_BATCH_SIZE = 100

# Send email kill switch

When this is set to `False` no emails will be sent.

In [18]:
from sailthru.sailthru_client import SailthruClient

sc = sailthru_client = SailthruClient(sailthru_api_key, sailthru_api_secret)

Vars in the template - the email will not be sent (fail silently) if these are not set!

* ret_pacing_body
* ret_pacing_course_id
* ret_pacing_subject_line
* ret_pacing_preview_text
* ret_pacing_from_name
* ret_pacing_upgrade_url

In [19]:
import itertools
import time
from pprint import pprint as pp
import numpy as np
from functools import partial
import json
from urllib.parse import quote_plus
import textwrap
import ipywidgets as widgets
from IPython.display import display

BASKET_URL_FORMAT = 'https://ecommerce.edx.org/basket/add/?sku={sku}'
BODY_TEMPLATE = textwrap.dedent("""\
<p>
    Dear {student_name},
</p>
<br>
<p>
    Welcome to week {week} of our {course_name} course!  We hope you are enjoying learning with
    us so far. If you have fallen behind, don't worry. You can still catch up.
</p>
<br>
<p>
    Here is what you can look forward to learning this week:
    {email_content}
</p>
<br>
<p>
    Your focused attention to the course will pay off in the end.  Keep up
    the good work!
</p>
<br>
<p>
    With Gratitude,
    <br>
    {course_name} Course Team
</p>
""")

def as_template_vars(row):
    row_vals = row._asdict()
    row_vals['week'] = row.week_number + 1
    
    assert row_vals.get('email_content'), f'email_content is empty for {row_vals}'
    assert row_vals.get('student_name'), f'student_name is empty for {row_vals}'
    assert row_vals.get('course_name'), f'course_name is empty for {row_vals}'
    assert row_vals.get('course_id'), f'course_id is empty for {row_vals}'
    
    template_vars = {
        'ret_pacing_body': BODY_TEMPLATE.format(**row_vals),
        'ret_pacing_course_id': row.course_id,
        'ret_pacing_subject_line': "{course_name} - Welcome to Week {week}".format(**row_vals),
        'ret_pacing_preview_text': '',
        'ret_pacing_from_name': row.course_name,
    }
    
    sku = row_vals.get('sku')
    if sku and row_vals['week'] < 4 and row_vals.get('enrollment_mode') == 'audit':
        assert 'verified_upgrade_deadline' in row_vals, f'verified_upgrade_deadline is empty in {row_vals}'
        template_vars['ret_pacing_upgrade_url'] = BASKET_URL_FORMAT.format(sku=sku)
        template_vars['ret_pacing_verified_upgrade_deadline'] = row_vals['verified_upgrade_deadline'].strftime('%B %d, %Y')

    return template_vars

class SailthruEmail(object):
    
    def __init__(self, sailthru_client, template_vars=None):
        self.sailthru_client = SailthruClient(sailthru_api_key, sailthru_api_secret)
        self.template_vars = template_vars or {}
        
    def send_to_users(self, var_overrides, batch_size=MAX_BATCH_SIZE, is_test=False, debug=False):
        # This is the max batch size that Sailthru supports
        assert(batch_size <= MAX_BATCH_SIZE)
        
        print(f'Sending {len(var_overrides)} emails')
        
        progress = widgets.IntProgress(min=0, max=len(var_overrides))
        display(progress)
        succeeded = widgets.IntText(value=0, description='Sent: ')
        display(succeeded)

        def batch_generator():
            for _, group in var_overrides.groupby(['week_number', 'course_id']):
                for k, batch in group.groupby(np.arange(len(group))//batch_size):
                    yield batch

        batch_iterator = iter(batch_generator())

        try:
            for batch in batch_iterator:
                succeeded.value += self._send_batch(batch, is_test=is_test, debug=debug)
                progress.value += len(batch)
        except Exception as exc:
            print(exc)
        
        return batch_iterator
            
    def _send_batch(self, batch, is_test=False, debug=False):
        self.wait_for_rate_limit()
        
        # Build the template vars for every user in the batch
        batch_vars = {
            row.user_email: {
                **self.template_vars,
                **as_template_vars(row)
            }
            for row in batch.itertuples()
        }

        for email, override in batch_vars.items():
            assert(email)
            assert(override['ret_pacing_body'])
            assert(override['ret_pacing_course_id'])
            assert(override['ret_pacing_subject_line'])
            assert(override['ret_pacing_from_name'])

        batch_size = len(batch)
        if debug:
            print(
               f'Sending an email{" [TEST]" if is_test else ""}'
               f' using the template "{TEMPLATE_NAME}" to {batch_size} users'
            )
            print(json.dumps([row.user_email for row in batch.itertuples()], indent=4))
            print(json.dumps(batch_vars, indent=4))

        if SEND_EMAIL:
            options = {}
            if is_test:
                options['test'] = 1
                
            emails = [row.user_email for row in batch.itertuples()]
            
            response = sc.multi_send(
                TEMPLATE_NAME,
                emails,
                evars=batch_vars,
                options=options
            )
            if response.is_ok():
                body = response.get_body()
                return body.get("sent_count", 1)
            else:
                error = response.get_error()
                print("Error: " + error.get_message())
                print("Status Code: " + str(response.get_status_code()))
                print("Error Code: " + str(error.get_error_code()))
                return 0
        else:
            return batch_size

    def wait_for_rate_limit(self):
        rate_limit_info = self.sailthru_client.get_last_rate_limit_info('send', 'POST')
        if rate_limit_info is not None:
            limit = int(rate_limit_info['limit'])
            remaining = int(rate_limit_info['remaining'])
            reset_timestamp = int(rate_limit_info['reset'])
            seconds_till_reset = reset_timestamp - time.time()
            if remaining <= 0 and seconds_till_reset > 0:
                print(f'Rate limit exceeded, sleeping for {seconds_till_reset} seconds')
                time.sleep(seconds_till_reset + 1)

In [20]:
email = SailthruEmail(
    sailthru_client=sailthru_client,
    template_vars={
    }
)

# Sending emails

1. First send to RET
2. Send to edX employees
3. Set SEND_EMAIL=False
4. Call the method to send to all learners - this will validate all of template vars
5. Set SEND_EMAIL=True
6. Send the email to all learers

In [26]:
ret_emails = {
    'gabe@edx.org': 'Gabe Mulley',
    'cblackburn@edx.org': 'Clinton Blackburn',
    'cale@edx.org': 'Calen Pennington',
    'nasthagiri@edx.org': 'Nimisha Asthagiri',
    'aboehm@edx.org': 'Alyssa Boehm',
    'ddomingos@edx.org': 'Darren Domingos',
    'kwillemin@edx.org': 'Katy Willemin',
#     'jzheng@edx.org': 'Jeannie Zheng',
}

course_ids = (
    'course-v1:IIMBx+AC103x+2T2017',
    'course-v1:AdelaideX+Project101x+1T2017',
    'course-v1:DelftX+LfE101x+2T2017',
    'course-v1:ColumbiaX+DS101X+1T2017',
    'course-v1:PennX+SD2x+2T2017',
    'course-v1:PennX+SD1x+2T2017',
    'course-v1:HarvardX+PH525.1x+2T2017',
    'course-v1:MichiganX+UX501x+3T2016',
    'course-v1:UBCx+Marketing1x+3T2015',
)

In [27]:
unique_cohort_availability_dates = week_emails.cohort_availability.unique()

In [28]:
import random

result = pd.DataFrame(columns=week_emails.columns)
for email_address, name in ret_emails.items():
    for course_id in course_ids:
        for cohort_availability_date in unique_cohort_availability_dates:
            template = week_emails[(week_emails.course_id==course_id) & (week_emails.cohort_availability==cohort_availability_date)].head(n=1)
            if len(template) == 0:
                continue
            tmp = template.copy()
            tmp['user_email'] = email_address
            tmp['student_name'] = name
            result = result.append(tmp)

In [33]:
SEND_EMAIL = False

In [30]:
email.send_to_users(result, is_test=True, debug=True)

Sending 63 emails


Sending an email [TEST] using the template "RET Pacing" to 7 users
[
    "gabe@edx.org",
    "cblackburn@edx.org",
    "cale@edx.org",
    "nasthagiri@edx.org",
    "aboehm@edx.org",
    "ddomingos@edx.org",
    "kwillemin@edx.org"
]
{
    "gabe@edx.org": {
        "ret_pacing_body": "<p>\n    Dear Gabe Mulley,\n</p>\n<br>\n<p>\n    Welcome to week 5 of our Introduction to Project Management course!  We hope you are enjoying learning with\n    us so far. If you have fallen behind, don't worry. You can still catch up.\n</p>\n<br>\n<p>\n    Here is what you can look forward to learning this week:\n    <ul class=\"ret-pacing-weekly-content\" data-week-number=\"4\">\n  <li>Learn about project teams and communication</li>\n  <li>Learn how to engage and manage your project stakeholders and how to resource your project team</li>\n  <li>Find out how to use your communication tools effectively</li>\n</ul>\n</p>\n<br>\n<p>\n    Your focused attention to the course will pay off in the end.  Keep 

Sending an email [TEST] using the template "RET Pacing" to 7 users
[
    "gabe@edx.org",
    "cblackburn@edx.org",
    "cale@edx.org",
    "nasthagiri@edx.org",
    "aboehm@edx.org",
    "ddomingos@edx.org",
    "kwillemin@edx.org"
]
{
    "gabe@edx.org": {
        "ret_pacing_body": "<p>\n    Dear Gabe Mulley,\n</p>\n<br>\n<p>\n    Welcome to week 5 of our Statistical Thinking for Data Science and Analytics course!  We hope you are enjoying learning with\n    us so far. If you have fallen behind, don't worry. You can still catch up.\n</p>\n<br>\n<p>\n    Here is what you can look forward to learning this week:\n    <ul class=\"ret-pacing-weekly-content\" data-week-number=\"4\">\n  <li>In this module, we will introduce the philosophy of Bayesian inference and Bayesian modeling techniques using illustrative case studies.</li>\n  <li>Several examples will illustrate Bayesian modeling in action.</li>\n  <li>A variety of exercises on Bayesian modeling, as well as an opportunity to discuss 

Sending an email [TEST] using the template "RET Pacing" to 7 users
[
    "gabe@edx.org",
    "cblackburn@edx.org",
    "cale@edx.org",
    "nasthagiri@edx.org",
    "aboehm@edx.org",
    "ddomingos@edx.org",
    "kwillemin@edx.org"
]
{
    "gabe@edx.org": {
        "ret_pacing_body": "<p>\n    Dear Gabe Mulley,\n</p>\n<br>\n<p>\n    Welcome to week 5 of our Leadership for Engineers course!  We hope you are enjoying learning with\n    us so far. If you have fallen behind, don't worry. You can still catch up.\n</p>\n<br>\n<p>\n    Here is what you can look forward to learning this week:\n    <ul class=\"ret-pacing-weekly-content\" data-week-number=\"4\">\n  <li>Working on a framework for your career</li>\n  <li>Why asking for support is essential</li>\n  <li>Defining your search strategy</li>\n</ul>\n</p>\n<br>\n<p>\n    Your focused attention to the course will pay off in the end.  Keep up\n    the good work!\n</p>\n<br>\n<p>\n    With Gratitude,\n    <br>\n    Leadership for Engineers 

Sending an email [TEST] using the template "RET Pacing" to 7 users
[
    "gabe@edx.org",
    "cblackburn@edx.org",
    "cale@edx.org",
    "nasthagiri@edx.org",
    "aboehm@edx.org",
    "ddomingos@edx.org",
    "kwillemin@edx.org"
]
{
    "gabe@edx.org": {
        "ret_pacing_body": "<p>\n    Dear Gabe Mulley,\n</p>\n<br>\n<p>\n    Welcome to week 5 of our Introduction to User Experience course!  We hope you are enjoying learning with\n    us so far. If you have fallen behind, don't worry. You can still catch up.\n</p>\n<br>\n<p>\n    Here is what you can look forward to learning this week:\n    <ul class=\"ret-pacing-weekly-content\" data-week-number=\"4\">\n  <li>How UX Design operates in the business world through an interview with a current UX Designer.</li>\n  <li>An (incomplete) history of UX and its rise with the era of personal computing.</li>\n  <li>You will practice the UX Design process by iterating on earlier sketch solutions.</li>\n</ul>\n</p>\n<br>\n<p>\n    Your focused

Sending an email [TEST] using the template "RET Pacing" to 7 users
[
    "gabe@edx.org",
    "cblackburn@edx.org",
    "cale@edx.org",
    "nasthagiri@edx.org",
    "aboehm@edx.org",
    "ddomingos@edx.org",
    "kwillemin@edx.org"
]
{
    "gabe@edx.org": {
        "ret_pacing_body": "<p>\n    Dear Gabe Mulley,\n</p>\n<br>\n<p>\n    Welcome to week 6 of our Introduction to Project Management course!  We hope you are enjoying learning with\n    us so far. If you have fallen behind, don't worry. You can still catch up.\n</p>\n<br>\n<p>\n    Here is what you can look forward to learning this week:\n    <ul class=\"ret-pacing-weekly-content\" data-week-number=\"5\">\n  <li>It's time for project closure and handover</li>\n  <li>Learn the importance of measuring success and how to evaluate your project</li>\n  <li>Find out about the key steps you need to take as part of your project closure</li>\n</ul>\n</p>\n<br>\n<p>\n    Your focused attention to the course will pay off in the end.  Keep 

Sending an email [TEST] using the template "RET Pacing" to 7 users
[
    "gabe@edx.org",
    "cblackburn@edx.org",
    "cale@edx.org",
    "nasthagiri@edx.org",
    "aboehm@edx.org",
    "ddomingos@edx.org",
    "kwillemin@edx.org"
]
{
    "gabe@edx.org": {
        "ret_pacing_body": "<p>\n    Dear Gabe Mulley,\n</p>\n<br>\n<p>\n    Welcome to week 6 of our Introduction to Marketing course!  We hope you are enjoying learning with\n    us so far. If you have fallen behind, don't worry. You can still catch up.\n</p>\n<br>\n<p>\n    Here is what you can look forward to learning this week:\n    <ul class=\"ret-pacing-weekly-content\" data-week-number=\"5\">\n  <li>You will learn how to monitor and facilitate the online conversations that are going on around your brand, and how to identify and leverage marketing opportunities that social media offer</li>\n  <li>You will also be introduced to some of the data, metrics, and measures that are used in marketing. Such measurements are used to ev

<generator object SailthruEmail.send_to_users.<locals>.batch_generator at 0x7fc847972fc0>

In [94]:
edx_emails = week_emails[week_emails.user_email.apply(lambda x: '@edx.org' in x)]

In [41]:
# ptr = email.send_to_users(edx_emails, is_test=True)

In [31]:
len(week_emails)

2893

In [32]:
ptr = email.send_to_users(week_emails)

Sending 2893 emails


Save a CSV with the data used to send the emails... we might want this later? This would be our only record of what emails were sent to which users.

In [45]:
#week_emails.to_csv('week_2_sent_emails.csv')

Insert into the email_experiment table the set of users included in this experiment. Note that this assumes a single cohort. If we run a second cohort, then we would need to rejigger things here a bit.

In [None]:
# %%vertica

# INSERT INTO ret.email_experiment
# SELECT
#     4 AS experiment_id,
#     'Simulated Pacing' AS experiment_name,
#     du.user_email AS email,
#     du.user_id,
#     du.user_username AS student_name,
#     segments.course_id AS course_id,
#     c.course_name,
#     segment_id AS group_id,
#     CASE segment_id WHEN 0 THEN 'Control'
#                     WHEN 1 THEN 'Treatment'
#     END AS group_name
#     FLOOR(DATEDIFF('day', content_availability, CURRENT_DATE()) / 7) :: INT AS week_number,
#     content_availability,
# FROM
#     ret.exp4_enrollments AS segments
# LEFT JOIN
#     lms_read_replica.bulk_email_optout AS eo
# ON
#     eo.user_id = segments.user_id
# LEFT JOIN
#     production.d_user_course duc
# ON
#     duc.user_id = segments.user_id AND duc.course_id = segments.course_id
# JOIN
#     production.d_user AS du
# ON
#     du.user_id = segments.user_id
# JOIN
#     business_intelligence.course_master AS c
# ON
#     c.course_id = segments.course_id
# WHERE
#         segment_id = 1
#     AND eo.user_id IS NULL
#     AND duc.current_enrollment_is_active
# ORDER BY
#     1, 2, 3

In [None]:
#%vertica COMMIT