# Load Email Addresses

In [1]:
experiment_id = 8

In [2]:
exclude_course_ids = (
    'course-v1:IIMBx+AC103x+2T2017',
    'course-v1:AdelaideX+Project101x+1T2017',
    'course-v1:DelftX+LfE101x+2T2017',
    'course-v1:ColumbiaX+DS101X+1T2017',
    'course-v1:PennX+SD2x+2T2017',
    'course-v1:PennX+SD1x+2T2017',
    'course-v1:HarvardX+PH525.1x+2T2017',
    'course-v1:MichiganX+UX501x+3T2016',
    'course-v1:UBCx+Marketing1x+3T2015',
    'course-v1:DelftX+CTB3365DWx+2T2017',
)

In [3]:
%%query [lms] (experiment_id, exclude_course_ids) -> users
SELECT
    exp.user_id,
    exp.course_id,
    exp.value AS json_data,
    au.email AS user_email,
    CASE WHEN COALESCE(aup.name, '') = '' THEN au.username ELSE aup.name END AS student_name,
    sce.mode AS enrollment_mode,
    cm.sku,
    cm.min_price AS price
FROM
(
    SELECT
        user_id,
        SUBSTRING(`key` FROM 26) AS course_id,
        value
    FROM
        experiments_experimentdata
    WHERE
        experiment_id = :experiment_id
) AS exp
JOIN
    auth_user au ON au.id = exp.user_id
JOIN
    auth_userprofile aup ON aup.user_id = exp.user_id
LEFT JOIN
    bulk_email_optout AS eo ON eo.user_id = exp.user_id
LEFT JOIN
    student_courseenrollment AS sce ON sce.user_id = exp.user_id AND sce.course_id = exp.course_id
LEFT JOIN
    course_modes_coursemode AS cm ON cm.course_id = exp.course_id AND cm.mode_slug = 'verified'
WHERE
        eo.user_id IS NULL
    AND sce.is_active = 1
    AND sce.mode = 'audit'
    AND exp.course_id NOT IN :exclude_course_ids

In [4]:
import ciso8601
import json
import datetime
import math

today_utc = datetime.datetime.utcnow().date()

users['segment'] = users['json_data'].apply(lambda value: json.loads(value)['segment'])
users['cohort_availability'] = users['json_data'].apply(lambda value: ciso8601.parse_datetime(json.loads(value)['cohort_availability']).date())
users['segmentation_reason'] = users['json_data'].apply(lambda value: json.loads(value).get('segmentation_reason', 'visit'))
users['verified_upgrade_deadline'] = users['cohort_availability'].apply(lambda d: d + datetime.timedelta(days=20))

In [5]:
%%vertica -> course_titles
SELECT
    course_id,
    catalog_course_title AS course_name,
    partner_short_code
FROM
    production.d_course

In [6]:
course_titles.head()

Unnamed: 0,course_id,course_name,partner_short_code
0,AdelaideX/HumBio101x/1T2015,Essential Human Biology: Cells and Tissues,edx
1,ak/akSState/1T2014,TestAK,edx
2,ANUx/ANU-ASTRO1x/1T2014,Greatest Unsolved Mysteries of the Universe,edx
3,ANUx/ANU-ASTRO2x/2T2014,Exoplanets,edx
4,ANUx/ANU-ASTRO3x/4T2014,The Violent Universe,edx


In [7]:
user_filter = (users.segment > 0) & (users.cohort_availability == datetime.date(2017, 8, 2))

In [8]:
import pandas as pd

treatment_users = pd.merge(
    users[user_filter],
    course_titles,
    on='course_id',
    how='inner'
)

In [9]:
assert len(users[user_filter]) == len(treatment_users)

In [10]:
len(treatment_users)

59608

In [11]:
treatment_users.groupby('cohort_availability').user_id.count()

cohort_availability
2017-08-02    59608
Name: user_id, dtype: int64

In [12]:
week_emails = treatment_users[treatment_users.partner_short_code == 'edx']

In [13]:
len(week_emails)

59608

# Set up Sailthru Infrastructure

In [14]:
import os
sailthru_api_key = os.getenv('OPS_SAILTHRU_API_KEY')
sailthru_api_secret = os.getenv('OPS_SAILTHRU_API_SECRET')

import getpass
if sailthru_api_key is None:
    sailthru_api_key = getpass.getpass(prompt='sailthru_api_key')
if sailthru_api_secret is None:
    sailthru_api_secret = getpass.getpass(prompt='sailthru_api_secret')

In [15]:
TEMPLATE_NAME = 'RET Self-Paced Verification Reminder'
MAX_BATCH_SIZE = 100

In [16]:
from sailthru.sailthru_client import SailthruClient

sc = sailthru_client = SailthruClient(sailthru_api_key, sailthru_api_secret)

In [17]:
import itertools
import time
from pprint import pprint as pp
import numpy as np
from functools import partial
import json
from urllib.parse import quote_plus
import textwrap
import ipywidgets as widgets
from IPython.display import display

BASKET_URL_FORMAT = 'https://ecommerce.edx.org/basket/add/?sku={sku}'

def as_template_vars(row):
    row_vals = row._asdict()
    
    assert row_vals.get('student_name'), f'student_name is empty for {row_vals}'
    assert row_vals.get('course_name'), f'course_name is empty for {row_vals}'
    assert row_vals.get('course_id'), f'course_id is empty for {row_vals}'
    assert row_vals.get('sku'), f'sku is empty for {row_vals}'
    assert row_vals.get('verified_upgrade_deadline'), f'verified upgrade deadline is empty for {row_vals}'
    
    template_vars = {
        'ret_pacing_course_name': row.course_name,
        'ret_pacing_course_id': row.course_id,
        'ret_pacing_subject_line': 'Highlight your success in {course_name}'.format(**row_vals),
        'ret_pacing_preview_text': '',
        'ret_pacing_from_name': row.course_name,
        'ret_pacing_student_name': row.student_name,
        'ret_pacing_upgrade_url': BASKET_URL_FORMAT.format(sku=row.sku),
        'ret_pacing_verified_upgrade_deadline': row.verified_upgrade_deadline.strftime('%B %d, %Y'),
        'ret_pacing_price': f'${row.price}.00',
    }

    return template_vars

class SailthruEmail(object):
    
    def __init__(self, sailthru_client, template_vars=None):
        self.sailthru_client = SailthruClient(sailthru_api_key, sailthru_api_secret)
        self.template_vars = template_vars or {}
        
    def send_to_users(self, var_overrides, batch_size=MAX_BATCH_SIZE, is_test=False):
        # This is the max batch size that Sailthru supports
        assert(batch_size <= MAX_BATCH_SIZE)
        
        progress = widgets.IntProgress(min=0, max=len(var_overrides))
        display(progress)
        succeeded = widgets.IntText(value=0, description='Sent: ')
        display(succeeded)
        batch_iterator = iter(var_overrides.groupby(np.arange(len(var_overrides))//batch_size))
        try:
            for k, batch in batch_iterator:
                self._send_batch(batch, is_test=is_test)
                progress.value += len(batch)
                succeeded.value += len(batch)
        except Exception as exc:
            print(exc)
        return batch_iterator
            
    def _send_batch(self, batch, is_test=False):
        self.wait_for_rate_limit()
        
        # Build the template vars for every user in the batch
        batch_vars = {
            row.user_email: {
                **self.template_vars,
                **as_template_vars(row)
            }
            for row in batch.itertuples()
        }

        for email, override in batch_vars.items():
            assert(email)
            assert(override['ret_pacing_course_id'])
            assert(override['ret_pacing_subject_line'])
            assert(override['ret_pacing_from_name'])
        
        if SEND_EMAIL:
            options = {}
            if is_test:
                options['test'] = 1
                
            emails = [row.user_email for row in batch.itertuples()]
            
            response = sc.multi_send(
                TEMPLATE_NAME,
                emails,
                evars=batch_vars,
                options=options
            )
            if response.is_ok():
                body = response.get_body()
                print(f'Send successful to {body.get("sent_count", 1)} users')
            else:
                error = response.get_error()
                print("Error: " + error.get_message())
                print("Status Code: " + str(response.get_status_code()))
                print("Error Code: " + str(error.get_error_code()))
        else:
            batch_size = len(batch)
#             print(
#                 f'Would have sent an email{" [TEST]" if is_test else ""}'
#                 f' using the template "{TEMPLATE_NAME}" to {batch_size} users'
#             )
#             print(json.dumps([row.user_email for row in batch.itertuples()], indent=4))
#             print(json.dumps(batch_vars, indent=4))

    def wait_for_rate_limit(self):
        rate_limit_info = self.sailthru_client.get_last_rate_limit_info('send', 'POST')
        if rate_limit_info is not None:
            limit = int(rate_limit_info['limit'])
            remaining = int(rate_limit_info['remaining'])
            reset_timestamp = int(rate_limit_info['reset'])
            seconds_till_reset = reset_timestamp - time.time()
            if remaining <= 0 and seconds_till_reset > 0:
                print(f'Rate limit exceeded, sleeping for {seconds_till_reset} seconds')
                time.sleep(seconds_till_reset + 1)

In [18]:
email = SailthruEmail(
    sailthru_client=sailthru_client,
    template_vars={
    }
)

# Sending emails

1. First send to RET
2. Send to edX employees
3. Set SEND_EMAIL=False
4. Call the method to send to all learners - this will validate all of template vars
5. Set SEND_EMAIL=True
6. Send the email to all learers

In [31]:
ret_emails = {
    'gabe@edx.org': 'Gabe Mulley',
#     'cblackburn@edx.org': 'Clinton Blackburn',
#     'cale@edx.org': 'Calen Pennington',
#     'nasthagiri@edx.org': 'Nimisha Asthagiri',
#     'aboehm@edx.org': 'Alyssa Boehm',
#     'ddomingos@edx.org': 'Darren Domingos',
#     'kwillemin@edx.org': 'Katy Willemin',
    #'jzheng@edx.org': 'Jeannie Zheng',
}

In [32]:
template = week_emails[week_emails.course_id=='course-v1:Microsoft+DEV204.1x+2T2017'].head(1)

In [33]:
import pandas as pd
result = pd.DataFrame(columns=template.columns)
for email_address, name in ret_emails.items():
    tmp = template.copy()
    tmp['user_email'] = email_address
    tmp['student_name'] = name
    result = result.append(tmp)

In [34]:
SEND_EMAIL = True

In [35]:
email.send_to_users(result, is_test=True)

Send successful to 1 users


<generator object BaseGrouper.get_iterator at 0x7f03fee0ef10>

In [126]:
edx_emails = week_emails[week_emails.user_email.apply(lambda x: '@edx.org' in x)]

In [127]:
#email.send_to_users(edx_emails, is_test=True)

Send successful to 21 users


In [36]:
ptr = email.send_to_users(week_emails)

Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 99 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 99 users
Send successful to 99 users
Send successful to 100 users
Send successful to 99 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 99 users
Send successful to 

Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 99 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 99 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful 

Send successful to 100 users
Send successful to 99 users
Send successful to 98 users
Send successful to 100 users
Send successful to 100 users
Send successful to 93 users
Send successful to 90 users
Send successful to 100 users
Send successful to 100 users
Send successful to 98 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 99 users
Send successful to 89 users
Send successful to 98 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 100 users
Send successful to 99 users
Send successful to 7 users


Save a CSV with the data used to send the emails... we might want this later? This would be our only record of what emails were sent to which users.

In [178]:
# week_emails.to_csv('8_first_send_sent_emails.csv')

Insert into the email_experiment table the set of users included in this experiment. Note that this assumes a single cohort. If we run a second cohort, then we would need to rejigger things here a bit.

In [3]:
# import pandas as pd
# week_emails = pd.read_csv('8_first_send_sent_emails.csv')

In [4]:
# %%vertica -> usernames
# SELECT
#     user_id,
#     user_username
# FROM production.d_user

In [6]:
# merged = pd.merge(
#     week_emails,
#     usernames,
#     on='user_id'
# )

In [81]:
# import ipywidgets as widgets
# from IPython.display import display

# progress = widgets.IntProgress(min=0, max=len(merged))
# display(progress)
# succeeded = widgets.IntText(value=0, description='Inserted: ')
# display(succeeded)
# skipped = widgets.IntText(value=0, description='Skipped: ')
# display(skipped)

# print(len(merged))
# %vertica BEGIN
# for row in merged.itertuples():
#     try:
#         group_name = ['Control', 'Hard Deadline', 'Soft Deadline'][row.segment]
#         user_email = row.user_email
#         user_id = int(row.user_id)
#         user_username = row.user_username
#         course_id = row.course_id
#         if course_id in exclude_course_ids:
#             skipped.value += 1
#             continue
#         course_name = row.course_name.encode('ascii', 'replace').decode('ascii')
#         segment = int(row.segment)
#         try:
#             %vertica (experiment_id, group_name, user_email, user_id, user_username, course_id, course_name, segment) INSERT INTO ret.email_experiment (experiment_id, experiment_name, email, user_id, username, course_id, course_title, group_id, group_name) VALUES(:experiment_id, 'Self-paced Upgrade Deadline', :user_email, :user_id, :user_username, :course_id, :course_name, :segment, :group_name)
#         except:
#             print(row)
#             raise
#         succeeded.value += 1
#     finally:
#         progress.value += 1


59904
