In [102]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from uuid import uuid4

# Survey Funnel
This data describes users completing a survey on WarbyParker.com.  Users will "give up" at different points in the survey.  Students will be asked to analyze how many users move on from Question 1 to Question 2, etc.

## Question 1: What are you looking for?
100% of users answer this question

In [103]:
n = 500
q1 = pd.DataFrame({
    'user_id': [str(uuid4()) for i in range(n)],
    'response': np.random.choice(
        ["Men's Styles", "Women's Styles", "I'm not sure. Let's skip it."],
        p=[0.45, 0.45, 0.1], size=n
    )

})
q1['question_text'] = "1. What are you looking for?"
q1 = q1[['question_text', 'user_id', 'response']]

## Question 2: What's your fit?
95% of users answer this question

In [104]:
n = int(len(q1) * 0.95)

q2 = q1.loc[np.random.choice(q1.index.values, size=n, replace=False), 'user_id'].reset_index()
q2['response'] = np.random.choice(
    ["Narrow", "Medium", "Wide", "I'm not sure. Let's skip it."],
    p=[0.4, 0.3, 0.2, 0.1], size=n)

q2['question_text'] = "2. What's your fit?"
q2 = q2[['question_text', 'user_id', 'response']].reset_index(drop=True)

## Question 3: What shapes do you like?
80% of users answer this question

In [105]:
n = int(len(q2) * 0.8)
q3 = q2.loc[np.random.choice(q2.index.values, size=n, replace=False), 'user_id'].reset_index()
q3['response'] = np.random.choice(
    ["Round", "Rectangular", "Square", "No Preference"],
    p=[0.2, 0.4, 0.3, 0.1], size=n)
q3['question_text'] = "3. How old are you?"
q3 = q3[['question_text', 'user_id', 'response']]

## Question 4: Which colors do you like?
95% answer this question

In [106]:
n = int(len(q3) * 0.95)
q4 = q3.loc[np.random.choice(q3.index.values, size=n, replace=False), 'user_id'].reset_index()
q4['response'] = np.random.choice(
    ['Neutral', 'Black', 'Tortoise', 'Crystal', 'Two-Tone'],
    p=[0.1, 0.3, 0.3, 0.2, 0.1], size=n)
q4['question_text'] = "4. Which colors do you like?"
q4 = q4[['question_text', 'user_id', 'response']]

## Question 5: When was your last eye exam?
75% answer this question

In [107]:
n = int(len(q4) * 0.75)
q5 = q4.loc[np.random.choice(q4.index.values, size=n, replace=False), 'user_id'].reset_index()
q5['response'] = np.random.choice(
    ["<1 Year", "1-3 Years", "3+ Years", "Not Sure. Let's Skip It"],
    p=[0.5, 0.2, 0.15, 0.15], size=n)
q5['question_text'] = "5. When was your last eye exam?"
q5 = q5[['question_text', 'user_id', 'response']]

## Combine Questions

In [1]:
responses = pd.concat([q1, q2, q3, q4, q5]).sort_values(by='user_id')
responses.to_csv('warby_parker_survey.csv', index=False)

NameError: name 'pd' is not defined

In [2]:
responses.head()

NameError: name 'responses' is not defined

# Home Try-On Funnel
This project will track users through three steps to making a purchase at WarbyParker.com:
1. Take a Style Quiz
2. Select Glasses to Try on at Home
3. Purchase the Perfect Pair of Glasses

During the Home Trial stage, we will be conducting an A/B Test:
- 50% of users will get 3 pairs to try on
- 50% of users will get 5 pairs to try on

The conclusion will be that users who get more pairs to try on will be more likely to make a purchase.

Data will be distributed across three tables:
- `quiz_results`
- `home_trial`
- `purchases`

## Style Quiz
This table will have the following schema:

|user_id|style|fit|color|shape|
|-|-|-|-|
|...|...|...|...|...|

In [110]:
n = 1000

quiz_results = pd.DataFrame({
    'user_id': [str(uuid4()) for i in range(n)],
    'style': np.random.choice(
        ["Men's Styles", "Women's Styles", "I'm not sure. Let's skip it."],
        p=[0.45, 0.45, 0.1], size=n),
    'fit': np.random.choice(
        ["Narrow", "Medium", "Wide", "I'm not sure. Let's skip it."],
        p=[0.4, 0.3, 0.2, 0.1], size=n),
    'shape': np.random.choice(
        ["Round", "Rectangular", "Square", "No Preference"],
        p=[0.2, 0.4, 0.3, 0.1], size=n),
    'color': np.random.choice(
        ['Neutral', 'Black', 'Tortoise', 'Crystal', 'Two-Tone'],
        p=[0.1, 0.3, 0.3, 0.2, 0.1], size=n)
})[['user_id', 'style', 'fit', 'shape', 'color']]


quiz_results.to_csv('warby_parker_quiz.csv', index=False)

## Home Try On
About %70 of users who took the style quiz will do an at-home try on.

This table will have the following schema:
|user_id|number_of_pairs|address|
|-|-|-|
|...|...|...|

In [111]:
street_names = '''
Abingdon Square
Abraham Kazan St
Abraham Kazan St
Abraham Pl
Absecon Rd
Academy St
Adam Clayton Powell Jr Blvd
Adrian Ave
Alan St
Albany St
Allen St
Amsterdam Ave
Amsterdam Ave
Andes Rd
Ann St
Arden St
Asser Levy Pl
Astor Pl
Attorney St
Audubon Ave
Avenue A
Avenue B
Avenue C
Avenue D
Avenue Of The Finest
Avenue Of The Strongest
Bank St
Barclay St
Barrow St
Barrow St
Barry Rd
Baruch Dr
Baruch Pl
Battery Park Underpass
Battery Pl
Baxter St
Bayard St
Beach St
Beak St
Bear Rd
Beaver St
Bedford St
Beekman Pl
Beekman St
Bennett Ave
Benson St
Bernard Baruch Way
Bethune St
Bialystoker Pl
Bialystoker St
Bleecker St
Bloomfield St
Bogardus Pl
Bond St
Bradhurst Ave
Bridge St
Broad St
Broadway
Broadway Terrace
Brooklyn Battery Tunnel
Brooklyn Bridge
Broome St
Cabrini Blvd
Canal St
Cannon St
Carder Rd
Cardinal Hayes Pl
Carlisle St
Carmine St
Cartigan Rd
Cathedral Pkwy
Catherine Ln
Catherine Slip
Catherine St
Cedar St
Center Dr
Central Park Driveway
Central Park N
Central Park S
Central Park West
Central Rd
Centre Market Pl
Centre St
Centre St
Chambers St
Charles Ln
Charles St
Charlton St
Chatham Square
Cherokee Pl
Cherry St
Chincoteague Rd
Chisum Pl
Chittenden Ave
Christopher St
Chrystie St
Church St
Claremont Ave
Clarkson St
Clayton Rd
Cleveland Pl
Cliff St
Clinton St
Coenties Alley
Coenties Slip
Collister St
Colonel Robert Magaw Pl
Columbia St
Columbus Ave
Columbus Cir
Comfort Rd
Commerce St
Convent Ave
Cooper Square
Cooper St
Cornelia St
Cortlandt Alley
Cortlandt St
Craig Rd N
Craig Rd S
Crosby St
Ctrl Park Powell Blvd Appr
Cumming St
De Peyster St
Delancey St
Delancey St S
Desbrosses St
Dey St
Division Rd
Division St
Dominick St
Dongan Pl
Donnellon Square
Doris C Freedman Pl
Dover St
Downing St
Doyers St
Duane St
Duke Ellington Blvd
Dutch St
Dyckman St
Dyer Ave
W Broadway
W Houston St
W Rd
W Thames St
W Washington Pl
Wadsworth Ave
Wadsworth Terrace
Walker St
Wall St
Wards Is Rd
Warren St
Warren St
Warren St
Warren St
Washington Bridge
Washington Mews
Washington Pl
Washington Square E
Washington Square N
Washington Square S
Washington Square W
Washington St
Washington St
Washington St
Washington Terrace
Water St
Watts St
Waverly Pl
Weehawken St
West End Ave
West St
West St
West St
Wheeler Ave
White St
Whitehall St
Willett St
William St
Williamsburg Bridge
Willis Ave Bridge
Wooster St
Worth St
Wyckoff Ave
Yeaton Rd
York Ave
Federal Plaza
Ferry Line Rd
Fletcher St
Forsyth St
Fort Charles Pl
Fort George Ave
Fort George Hill
Fort Washington Ave
Frankfort St
Franklin D. Roosevelt East River Dr
Franklin Pl
Franklin St
Frawley Cir
Fred Douglass Cir
Frederick Douglass Blvd
Frederick Douglass Cir
Freedom Pl
Freeman Alley
Front St
Ft Tryon Pl
Gansevoort St
Gay St
George Washington Bridge
Gold St
Gouverneur Ln
Gouverneur Slip E
Gouverneur Slip W
Gouverneur St
Gouvernour St
Governors Is
Gracie Square
Gracie Terrace
Gramercy Park E
Gramercy Park N
Gramercy Park S
Gramercy Park W
Grand Army Plaza
Grand St
Great Jones Alley
Great Jones St
Greene Ave
Greene St
Greenwich Ave
Greenwich Ct
Greenwich St
Gresham Rd
Grove Ct
Grove St
Grove St
Gustave L. Levy Pl
Half Moon Rd
Hamilton Pl
Hamilton Pl
Hamilton Terrace
Hancock Pl
Hanover Square
Hanover St
Harlem River Dr
Harrison St
Haven Ave
Hay Rd
Hayes Pl
Henderson Pl
Henry Browne Block
Henry Hudson Pkwy
Henry St
Henry St
Henshaw St
Hester St
Hillside Ave
Hogan Pl
Holland Tunnel
Horatio St
Howard St
Hubert St
Hudson St
Hwy 9
Hwy 9
Icarus Rd
Indian Rd
Interstate 495
Interstate 678
Interstate 78
Irving Pl
Isham St
Jackson St
Jacobus Pl
James St
Jane St
Jay St
Jefferson Ave
Jefferson St
Jersey St
Joe Dimaggio Hwy
John Jay Byrne Bridge
John St
Jones Alley
Jones St
Joseph P Ward St
Jumel Pl
Jumel Terrace
Kenmare St
Kimball Rd - Governors Island National Monument
Kimmel Rd
King Ave
King St
La Salle St
Lafayette Ct
Lafayette St
Laguardia Pl
Laight St
Laurel Hill Terrace
Legion Memorial Square
Lenox Ave
Lenox Terrace Pl
Leonard St
Leroy St
Lewis St
Lexington Ave
Liberty Pl
Liberty St
Lillian Wald Dr
Lincoln Center Plaza
Lincoln Tunnel
Lispenard St
Little W St
Little West 12th St
Lloyd E Dickens Pl
Loisaida Ave
Ludlow St
Luis Munoz Marin Blvd
Mac Dougal Alley
Macdougal St
Macombs Dam Bridge
Macombs Pl
Madison Ave
Madison Ave
Madison Ave Bridge
Madison Square N
Madison St
Magaw Pl
Maiden Ln
Main St
Malcolm X Blvd
Mangin St
Manhattan
Manhattan Ave
Marble Hill Ave
Marble Hill Ln
Margaret Corbin Dr
Marginal St
Market Slip
Market St
Marketfield St
Martin Luther King Blvd
Mc Kenna Square
Mc Nally Plaza
Mercer St
Meyer Square
Mill Ln
Milligan Pl
Minetta Ln
Minetta St
Mitchell Pl
Monroe St
Monroe St
Montgomery St
Moore St
Morningside Ave
Morningside Dr
Morris St
Morton St
Mosco St
Mott St
Mt Carmel Pl
Mt Morris Park W
Mulberry St
Murray St
N Moore St
Nagle Ave
Nassau St
Nathan D Perlman Pl
New York 9a
New York Plaza
Norfolk St
North End Ave
Odell Clark Pl
Old Broadway
Old Slip
Oliver St
Orchard St
Overlook Terrace
Owasco Rd
Paladino Ave
Park Ave
Park Ave
Park Ave S
Park Pl
Park Row
Park Terrace E
Park Terrace W
Parks Ave
Patchin Pl
Payson Ave
Pearl St
Pell St
Penn Plaza
Perry St
Peter Cooper Rd
Pike Slip
Pike St
Pine St
Pinehurst Ave
Pitt St
Platt St
Plaza Lafayette
Pleasant Ave
Pomander Walk
Post Ave
Prince St
Queens Midtown Tunnel
Queensboro Bridge
Randalls Is Rd
Reade St
Rector Pl
Rector St
Renwick St
Ridge St
River Rd
River Terrace
Riverside Blvd
Riverside Dr
Riverside Dr
Riverside Dr W
Riverview Terrace
Rivington St
Robert F Wagner Sr Pl
Rockefeller Plaza
Roosevelt Island Bridge
Rose St
Rutgers Slip
Rutgers St
Rutherford Pl
S Pinehurst Ave
S Street Viaduct
S William St
Samuel Dickstein Plaza
Seaman Ave
Seminary Row
Sheridan Square
Sheriff St
Sherman Ave
Short Ave
Shubert Alley
Sickles St
Sniffen Ct
South End Ave
South St
South St
Spring St
Spruce St
St Andrews Plaza
St Clair Pl
St James Pl
St Johns Ln
St Lukes Pl
St Marks Pl
St Nicholas Ave
St Nicholas Pl
St Nicholas Terrace
Staff St
Stanton St
Staple St
State St
Stone St
Stonewall Pl
Stuyvesant Oval
Stuyvesant St
Stuyvesant Walk
Suffolk St
Suffolk Street Community Garden
Sullivan St
Sullivan St
Sutton Pl S
Sutton Square
Sylvan Ct
Sylvan Pl
Sylvan Terrace
Szold Pl
Tampa Rd'''.split('\n')

In [112]:
n = int(0.75 * len(quiz_results))

home_trial = pd.DataFrame({
    'user_id': np.random.choice(quiz_results.user_id.values,
                                replace=False,
                                size=n),
    'number_of_pairs': np.random.choice(['3 pairs', '5 pairs'],
                                       p=[0.5, 0.5],
                                       size=n),
    'address': ['%d %s' %(np.random.randint(0, 500),
                         np.random.choice(street_names))
               for i in range(n)]
})[['user_id', 'number_of_pairs', 'address']]

home_trial.to_csv('warby_parker_home_trial.csv', index=False)

## Purchase
Of users who tried on 3 pairs, 60% will purchase Warby Parker glasses
Of users who tried on 5 pairs, 85% will purchase Warby Parker glasses

Table will have the following schema:

|user_id|product_id|model_name|color|style|price|
|-|-|-|-|
|...|...|...|...|...|...|

In [113]:
# Items that people will purchase
items = pd.DataFrame([
    [1, 'Brady', 'Layered Tortoise Matte', "Men's Styles", 95],
    [2, 'Brady', 'Sea Glass Gray', "Men's Styles", 95],
    [3, 'Dawes', 'Driftwood Fade', "Men's Styles", 150],
    [4, 'Dawes', 'Jet Black', "Men's Styles", 150],
    [5, 'Monocle', 'Endangered Tortoise', "Men's Styles", 50],
    [6, 'Olive', 'Pearled Tortoise', "Women's Styles", 95],
    [7, 'Lucy', 'Elderflower Crystal', "Women's Styles", 150],
    [8, 'Lucy', 'Jet Black', "Women's Styles", 150],
    [9, 'Eugene Narrow', 'Rose Crystal', "Women's Styles", 95],
    [10, 'Eugene Narrow', 'Rosewood Tortoise', "Women's Styles", 95]],
    columns=['product_id', 'model_name', 'color', 'style', 'price'])


In [114]:
tried_3_pair = home_trial[home_trial.number_of_pairs == '3 pairs'].user_id.values
tried_5_pair = home_trial[home_trial.number_of_pairs == '5 pairs'].user_id.values

purchase = pd.DataFrame({
    'user_id': np.concatenate([
        np.random.choice(tried_3_pair,
                         size=int(len(tried_3_pair) * 0.60),
                         replace=False),
        np.random.choice(tried_5_pair,
                         size=int(len(tried_5_pair) * 0.85),
                         replace=False)
    ])
})

purchase = purchase.merge(quiz_results[['user_id', 'style']])
purchase['product_id'] = purchase.apply(lambda row: np.random.choice([6, 7, 8, 9, 10])
                                          if row.style == "Women's Styles"
                                          else np.random.choice([1, 2, 3, 4, 5]),
                                          axis=1)
purchase = purchase.merge(items)\
    [['user_id', 'product_id', 'style', 'model_name', 'color', 'price']]\
    .sort_values(by='user_id')
purchase.to_csv('warby_parker_purchase.csv', index=False)