# Understand the life cycle of floorplans
- DS: Nhan Le
- PM: Sean Corriel

**Objective: **

    - how often are 3D floor plan users creating new floor plans over time

    - what frequency buckets do customers fall into? 

    - how is this distributed across customer base

    - how does this behavior differ between free trial / paid pros

    - how we can we distinguish (via dashboards) number of floor plans created by free trial vs paid customers

 

In Future phases, we can enrich this understanding with engagement events - editing properties within a floor plan, to gain a more resolution into the lifecycle of floor plans

adding a 3D product

modifying a 3D object (wall, or product) 

Sharing a 3D Floor Plan

Floor Plan Creation on mobile or web

Floor Plan Duplication - duplicated a floor plan and applying ‘edits’ to a duplicated floor planNeeded for access to all other apps. It would be a good idea to install ASAP. 

In [1]:
from pyhive import presto
import pandas as pd
import matplotlib as plt

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

from pyhive import presto
conn = presto.connect(host='presto-alpha-backend.data.houzz.net', port=8086)

In [2]:
def validate_floorplans():
    # sample data from floorplan elements
    # extract floorplans with company_user_id, created_dt
    # every floorplan has 9 (distinct) elements; must have at least one more to be considered "real"
    valid_fp_sql = '''
    with fp as (
        select floorplan_id, company_user_id, created_dt
            , count(floorplan_element_id) over (partition by floorplan_id) as fp_elements
            , min(date(created_dt)) over (partition by floorplan_id) as fp_created_dt 
            -- get 1 row per flooplan id
            , row_number() over (partition by floorplan_id) as row_num
        from gaia.dim_floorplan_elements 
        )
    , valid_floorplan as (select floorplan_id, company_user_id, created_dt, fp_created_dt, fp_elements
        , case when fp_elements > 9 then 'valid' else 'invalid' end as fp_status
    from fp
    where fp_elements > 9
    and row_num = 1
    order by floorplan_id, fp_created_dt
    )
    '''
    return valid_fp_sql
pd.read_sql(validate_floorplans() + 'select * from valid_floorplan limit 5' , conn)

Unnamed: 0,floorplan_id,company_user_id,created_dt,fp_created_dt,fp_elements,fp_status
0,152469592,210249,2020-01-16,2019-12-10,46,valid
1,152498473,57045505,2019-12-11,2019-12-11,20,valid
2,152500949,57045505,2019-12-11,2019-12-11,89,valid
3,152501711,57045505,2019-12-17,2019-12-11,63,valid
4,152736076,16821960,2019-12-17,2019-12-17,30,valid


In [4]:
def get_dim_companies(time_frame = '2020-01-01'): 
    # obtain country, pro_type, zip, metro area from dim_company_users
    # obtain sku, join_date from dim_company_user_sku_ranges
    # join_date is the first day of the month when the company first became paying customers
    dim_comp_sql = '''
    with dim_comp as (
        select company_user_id, join_date 
        from (
            select company_user_id, start_dt as join_date 
            from (
                -- dim_company_user_sku_ranges provides start_dt, country, sku, and free_or_paying
                -- dim_company_users has ivy_company_type and is_likely_houzzer 
                select  s.company_user_id
                    , s.start_dt 
                    , account_created_date
                    , row_number() over(PARTITION BY s.company_user_id
                                        ORDER BY start_dt ASC) AS row_num
                from gaia.dim_company_user_sku_ranges s 
                join gaia.dim_company_users c 
                on s.company_user_id = c.company_user_id
                where free_or_paying = 'paying'
                    and NOT c.is_likely_houzzer 
                    and c.ivy_company_type <> 'Not in ivy'
                )a
            where row_num = 1
            )b
        where date(join_date) >= date('{}')
    )

    -- get ft_date as first day in free trial status
    , ft as ( 
        select company_user_id, sku, country, pro_type, first_start_date, zip, metro_area, free_or_paying 
        from (
            select *,  start_dt as first_start_date
            from (
                select  s.*, zip, metro_area
                    --s.company_user_id, 
                    --start_dt,
                    --sku,
                    , case when country = 'AU' then 'AU'
                        when country = 'CA' then 'CA'
                        when country = 'DE' then 'DE'
                        when country = 'ES' then 'ES'
                        when country = 'FR' then 'FR'
                        when country = 'GB' then 'GB'
                        when country = 'IE' then 'IE'
                        when country = 'IT' then 'IT'
                        when country = 'JP' then 'JP'
                        when country = 'NZ' then 'NZ'
                        when country = 'RU' then 'RU'
                        when country = 'US' then 'US'
                        else 'Other' end as country

                    , ivy_company_type as pro_type 
                    , row_number() over(PARTITION BY s.company_user_id
                                        ORDER BY start_dt ASC) AS row_num
                            
                from gaia.dim_company_user_sku_ranges s 
                join gaia.dim_company_users c 
                on s.company_user_id = c.company_user_id
                where --free_or_paying <> 'paying' and 
                        NOT c.is_likely_houzzer 
                        and c.ivy_company_type <> 'Not in ivy'
                )a
        where row_num = 1
        )b
    )

    -- most recent is_still_live and sku status
    , live as (
        select company_user_id, is_still_live, sku as most_recent_sku
        from (  
            select *
            , row_number() over(PARTITION BY company_user_id ORDER BY end_dt DESC) AS row_num
            from gaia.dim_company_user_sku_ranges
            )a
        where row_num = 1
        )

    -- if paying customers are no longer live, get their last live date
    , last_live as (
        select company_user_id, end_dt as last_live_date
        from (  
            select *
            , row_number() over(PARTITION BY company_user_id ORDER BY end_dt DESC) AS row_num
            from gaia.dim_company_user_sku_ranges
            where free_or_paying = 'paying'
            )a
        where row_num = 1
        )        
    -- distinguish between companies who ever did free try
    ,first as (
        select ft.company_user_id, ft.country, ft.first_start_date, comp.join_date, ft.pro_type, ft.zip, ft.metro_area
            , case when first_start_date >= join_date then 'joined' else free_or_paying end as first_status
            , is_still_live, last_live_date, most_recent_sku

            -- pre activation days
            , date_diff('day', date(first_start_date), date(join_date)) as pre_activation_days
            -- activated days: use last live date if not live, othwerwise use today
            , date_diff('day', date(join_date),
                case when is_still_live = 1 then current_date
                else date(last_live_date) end) as survived_days
        from ft -- include both ft and never-joined companies
        left join dim_comp comp 
        on comp.company_user_id = ft.company_user_id
        left join live
        on live.company_user_id = ft.company_user_id
        left join last_live
        on last_live.company_user_id = ft.company_user_id
        
    )    

    
    '''.format(time_frame)
    return dim_comp_sql


In [5]:
# get sample 10 rows from get_dim_companies
pd.read_sql(get_dim_companies() + 'select * from first  limit 20', conn)

Unnamed: 0,company_user_id,country,first_start_date,join_date,pro_type,zip,metro_area,first_status,is_still_live,last_live_date,most_recent_sku,pre_activation_days,survived_days
0,10873522,CA,2021-08-26,,Others,,~other metros,freemium,0,,Freemium 1.0,,
1,10988636,US,2021-08-26,,Builders,21220,baltimore,freemium,0,,Freemium 1.0,,
2,17450903,RU,2021-08-26,,Others,+7,,freemium,0,,Freemium 1.0,,
3,17695955,US,2021-08-26,,Others,33315,miami,freemium,0,,Freemium 1.0,,
4,18076905,US,2021-08-26,,Others,02459,boston,freemium,0,,Freemium 1.0,,
5,24668766,US,2021-08-26,,Snail Cage,60608,chicago,freemium,0,,Freemium 1.0,,
6,25078835,US,2021-08-26,,Others,85266,phoenix,freemium,0,,Freemium 1.0,,
7,26674571,GB,2021-08-26,,Others,ML12 6FE,~other metros,freemium,0,,Paying Ent. Without SFDC Sku,,
8,30052966,RU,2021-08-26,,Others,664007,~other metros,freemium,0,,Freemium 1.0,,
9,31084049,US,2021-08-26,,Builders,66503,~other metros,freemium,0,,Freemium 1.0,,
