## percentage of customers with a subscription who also purchase from the shop


In [None]:
"""
input: purchased items and customer reports from  Glew
output: dataset of inactive cs active subscripbers
"""

import pandas as pd
import numpy as np

PATH = 'Purchased_items_1-13.csv'
PATH2 = 'Customer_report_1-13.csv'
RC_PI = pd.read_csv(PATH, delimiter=',')
RC_CR = pd.read_csv(PATH2, delimiter=',')

In [None]:
# reading items from recharge
RC_PI.info()

In [None]:
RC_PI.describe()

In [None]:
#reading customers from shopify
RC_CR.describe()

In [None]:
RC_CR_active = RC_CR[RC_CR['status']=='Active']
RC_CR_inactive = RC_CR[RC_CR['status']=='Inactive']


In [None]:
RC_CR_active.head()

In [None]:
RC_CR_inactive.head()


In [None]:
RC_CR_active.info()

In [None]:
RC_CR_inactive.info()

In [None]:
shopify_customers = pd.read_csv('all_customers_shopify_1-13.csv', delimiter=',')
shopify_customers.head()

In [None]:
shopify_customers.rename(columns={'Email':'customer email'}, inplace=True)
shopify_customers.head()

In [None]:
# active merged
merged_df_active = shopify_customers.merge(RC_CR_active, on='customer email', how='inner')
merged_df_active.head()

In [None]:
merged_df_active = merged_df_active[['First Name', 'Last Name', 'customer email', 'customer recharge id', 'Shopify customer id',
                       'created at', 'First Charge Processed At', 'status', 'Number Active Subscriptions',
                       'Number Subscriptions', 'portal_url', 'Total Spent', 'Total Orders', 'Past Orders Count',
                       'Tags', 'Address1', 'City', 'Province Code', 'Country', 'Accepts Marketing']]

In [None]:
merged_df_active.info()

In [None]:
merged_df_active.describe()

In [None]:
merged_df_active.head()

In [None]:
# inactive merged
merged_df_inactive = shopify_customers.merge(RC_CR_inactive, on='customer email', how='inner')
merged_df_inactive = merged_df_inactive[['First Name', 'Last Name', 'customer email', 'customer recharge id', 'Shopify customer id',
                       'created at', 'First Charge Processed At', 'status', 'Number Active Subscriptions',
                       'Number Subscriptions', 'portal_url', 'Total Spent', 'Total Orders', 'Past Orders Count',
                       'Tags', 'Address1', 'City', 'Province Code', 'Country', 'Accepts Marketing']]
merged_df_inactive.head()

In [None]:
merged_df_inactive.info()

In [None]:
# now take all orders history data from shopify and compare/merge with active vs inactive
# key : shopify customer id or customer email
all_files = ['orders_export_1.csv', 'orders_export_2.csv', 'orders_export_3.csv']

df_from_each_file = (pd.read_csv(f) for f in all_files)
all_orders = pd.concat(df_from_each_file, ignore_index=True)

In [None]:
all_orders.info()

In [None]:
# for all orders active
all_orders.rename(columns={'Email':'customer email'}, inplace=True)
all_orders_active = all_orders.merge(merged_df_active, on='customer email', how='inner')
all_orders_active.info()

In [None]:
# template for the columns that we want
columns = ['First Name', 'Last Name', 'customer email', 'customer recharge id', 'Shopify customer id',
           'created at', 'First Charge Processed At', 'status', 'Number Active Subscriptions',
           'Number Subscriptions', 'portal_url', 'Accepts Marketing_x', 'Total Spent', 'Total Orders',
           'Past Orders Count', 'Paid at', 'Lineitem name', 'Lineitem sku', 'Lineitem quantity',
           'Lineitem price', 'Subtotal', 'Total', 'Tags_x', 'Tags_y', 'Address1', 'City', 'Province Code', 'Country']

In [None]:
all_orders_active = all_orders_active[columns]
all_orders_active.info()

In [None]:
# for all orders inactive
all_orders_inactive = all_orders.merge(merged_df_inactive, on='customer email', how='inner')
all_orders_inactive = all_orders_inactive[columns]
all_orders_inactive.info()

#### analysis on active

In [None]:
all_orders_active.head()

In [None]:
all_orders_active.loc[
    all_orders_active.duplicated(
        ['customer email']),
    ['Total Spent']] = np.nan
all_orders_active.to_csv("all_orders_active.csv")

#### analysis on inactive

In [None]:
all_orders_inactive.head()

In [None]:
all_orders_inactive.loc[
    all_orders_inactive.duplicated(
        ['customer email']),
    ['Total Spent']] = np.nan
all_orders_inactive.to_csv("all_orders_inactive.csv")


#### inactive

In [None]:
all_orders_inactive.head()

In [None]:
words = ['Seasonal', 'Annual', 'Daebak Box', 'Deluxe', 'Original']
mask = all_orders_inactive['Lineitem name'].str.contains(r'\b(?:{})\b'.format('|'.join(words)))

In [None]:
mask

In [None]:
inactive_orders_filtered = all_orders_inactive[~mask]

In [None]:
inactive_orders_filtered.head()

In [None]:
all_orders_inactive['customer email'].nunique()

In [None]:
inactive_orders_filtered['customer email'].nunique()

In [None]:
inactive_orders_filtered.to_csv('inactive_orders_filtered.csv')

#### active

In [None]:
all_orders_active.head()

In [None]:
words = ['Seasonal', 'Annual', 'Daebak Box', 'Deluxe', 'Original',]
mask = all_orders_active['Lineitem name'].str.contains(r'\b(?:{})\b'.format('|'.join(words)))

In [None]:
mask

In [None]:
active_orders_filtered = all_orders_active[~mask]

In [None]:
active_orders_filtered.head()

In [None]:
active_orders_filtered.to_csv('active_orders_filtered.csv')

In [None]:
all_orders_active['customer email'].nunique()

In [None]:
active_orders_filtered['customer email'].nunique()
