**Index**
+ [x] version 0 : basic
+ [x] version 1 : Query text
+ [x] version 2 : Load Query text

In [1]:
import cx_Oracle as oci

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import numpy as np
import pandas as pd

from datetime import datetime

In [2]:
def get_service(api_name, api_version, scopes, key_file_location):
    credentials = ServiceAccountCredentials.from_json_keyfile_name(key_file_location, scopes=scopes)
    service = build(api_name, api_version, credentials=credentials)
    return service
    
# Define the auth scopes to request.
scope = 'https://www.googleapis.com/auth/analytics.readonly'
key_file_location = './vision API-06a448b64428.json'

# Authenticate and construct service.
service = get_service(api_name='analytics', api_version='v3', scopes=[scope], key_file_location=key_file_location)

# Get a list of all Google Analytics accounts for this user
accounts = service.management().accounts().list().execute()
if accounts.get('items'):
    # Get the first Google Analytics account.
    account = accounts.get('items')[0].get('id')
    # Get a list of all the properties for the first account.
    properties = service.management().webproperties().list(accountId=account).execute()

In [18]:
# 조건1 : 일자
start = "2021-01-01"
end = datetime.today().strftime('%Y-%m-%d')
start_date_info = [d.strftime('%Y-%m-%d') for d in pd.date_range(start, end, freq='MS')]
end_date_info = [d.strftime('%Y-%m-%d') for d in pd.date_range(start, end, freq='M')]
if len(start_date_info) != len(end_date_info):
    end_date_info.append(end)

In [17]:
end_date_info

['2021-01-31',
 '2021-02-28',
 '2021-03-31',
 '2021-04-30',
 '2021-05-31',
 '2021-06-30',
 '2021-07-26']

## 1.Acquisition

In [25]:

# 조건2 : 필터링 항목(dimensions)
dimensions_value = 'ga:channelGrouping'

# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:sessions'

# 수집내용 정의
column_name = (dimensions_value +','+ metrics_value).replace('ga:', '').split(",")
df2_ga = pd.DataFrame( columns =column_name)
df2_ga['date'] = ''

# 결과 수집+정리
for i in range(0, len(start_date_info)):
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         dimensions=dimensions_value, metrics=metrics_value).execute()
        df_temp = pd.DataFrame(result['rows'], columns =column_name)
        df_temp['date'] = start_date_info[i]
        df2_ga = pd.concat([df2_ga, df_temp])
    except:
        pass

# 자료의 상위속성 추가
df2_ga['gubun'] = 'ALL Activation'

# header name 변경
df2_ga.rename(columns = {'channelGrouping' : 'gubun2', 'sessions' : 'value'}, inplace = True)
df2_ga = df2_ga.reset_index(drop = True)

df2_ga['gubun2'][df2_ga['gubun2']=='Display'] = 'Display Ad'
df2_ga['gubun2'][df2_ga['gubun2']=='Social'] = 'Organic Social'
df2_ga['gubun2'][df2_ga['gubun2']=='CRM'] = 'Owned Channel'
df2_ga['gubun2'][df2_ga['gubun2']=='Paid Search'] = 'Search Ad'
df2_ga['gubun2'][df2_ga['gubun2']=='Referral'] = 'Site Referral'

# 월 정렬
df2_ga['date'] = df2_ga['date'].str[:7]
df2_MTLY = pd.pivot_table(df2_ga, index = ['gubun2'], values = 'value', columns = 'date', aggfunc = np.sum).fillna(0)

# # # total값 추가가공
df2_MTLY = df2_MTLY.T
df2_MTLY['ALL activation : Total'] = df2_MTLY['(Other)'].astype(int) + df2_MTLY['Direct'].astype(int)+ df2_MTLY['Display Ad'].astype(int)+ df2_MTLY['Organic Search'].astype(int)+ df2_MTLY['Search Ad'].astype(int)+ df2_MTLY['Site Referral'].astype(int)+ df2_MTLY['Organic Social'].astype(int)+ df2_MTLY['Owned Channel'].astype(int)
df2_MTLY = df2_MTLY.T

# # # 기준 테이블 작성/order 수정
df_order = pd.DataFrame()
df_order['gubun2'] = ['ALL activation : Total', 'Search Ad', 'Display Ad', 'Video Ad', 'Owned Channel', 'Organic Search','Organic Social','Site Referral', 'Direct', '(Other)']
df2_MTLY= pd.merge(df_order, df2_MTLY, on ='gubun2', how ='left').fillna(0)
df2_MTLY.set_index(df2_MTLY.gubun2,inplace = True)
del df2_MTLY['gubun2']

# df2_MTLY = df2_MTLY.reset_index(drop = False)

In [29]:
# 조건2 : 필터링 항목(dimensions)
dimensions_value = 'ga:browser'

# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:session'

# 수집내용 정의
column_name = (dimensions_value +','+ metrics_value).replace('ga:', '').split(",")
df1_ga = pd.DataFrame( columns =column_name)
df1_ga['date'] = ''

# 결과 수집+정리
for i in range(0, len(start_date_info)):
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         dimensions=dimensions_value, metrics=metrics_value).execute()
        df_temp = pd.DataFrame(result['rows'], columns =column_name)
        df_temp['date'] = start_date_info[i]
        df1_ga = pd.concat([df1_ga, df_temp])
    except:
        pass

# 자료의 상위속성 추가
df1_ga['gubun'] = 'APP activation'

# 임시자료 추가(2021년 1월 미수집에 따른)
df_temp = pd.DataFrame({"browser":['Android Webview'], "users":[0], "date":['2021-01-01'], "gubun":['APP activation']})
df1_ga = df1_ga.append(df_temp, ignore_index = True)

# 필요한 자료만 필터링
df1_ga = df1_ga[(df1_ga.browser == 'Android Webview' )|(df1_ga.browser == 'Safari (in-app)')]

# header name 변경
df1_ga.rename(columns = {'browser' : 'gubun2', 'users' : 'value'}, inplace = True)
df1_ga = df1_ga.reset_index(drop = True)

# 월 정렬
df1_ga['date'] = df1_ga['date'].str[:7]

# pivot table
df_MTLY = pd.pivot_table(df1_ga, index = ['gubun2'], values = 'value', columns = 'date', aggfunc = np.sum).fillna(0)

# total값 추가가공
df_MTLY = df_MTLY.T
df_MTLY['APP activation : total'] = df_MTLY['Android Webview'].astype(int)+ df_MTLY['Safari (in-app)'].astype(int)
df_MTLY.rename(columns = {'Android Webview' : 'Android', 'Safari (in-app)' : 'iOS'}, inplace = True)
df_MTLY = df_MTLY.T

# 기준 테이블 작성/order 수정
df_order = pd.DataFrame()
df_order['gubun2'] = ['APP activation : total', 'Android', 'iOS']

df_MTLY= pd.merge(df_order, df_MTLY, on ='gubun2', how ='left')

df_MTLY.set_index(df_MTLY.gubun2,inplace = True)
del df_MTLY['gubun2']

KeyError: 'Safari (in-app)'

In [26]:
df2_MTLY

Unnamed: 0_level_0,2021-02,2021-03,2021-04,2021-05,2021-06,2021-07
gubun2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ALL activation : Total,52242,45195,171801,260373,414592,220259
Search Ad,316,17,203,843,22354,31059
Display Ad,0,9251,9362,14119,259587,122895
Video Ad,0,0,0,0,0,0
Owned Channel,0,0,70,313,5190,8416
Organic Search,2715,5343,10133,24337,13596,11131
Organic Social,2185,2899,4475,6770,2229,1294
Site Referral,264,478,1244,759,35103,2056
Direct,46761,25348,24156,21605,22631,19574
(Other),1,1859,122158,191627,53902,23834


In [None]:
df_MTLY

In [31]:
##########
#1.Acquisition.txt
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/1.Acquisition_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df1 = cursor.fetchall()[1:]

df1 = pd.DataFrame(df1, columns = ['MONTH', 'App install', 'App install : paid'])
df1 = df1.set_index('MONTH').T.fillna(0)

In [32]:
# 조건2 : 필터링 항목(dimensions)
dimensions_value = 'ga:browser'

# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:users'

# 수집내용 정의
column_name = (dimensions_value +','+ metrics_value).replace('ga:', '').split(",")
df1_ga = pd.DataFrame( columns =column_name)
df1_ga['date'] = ''

# 결과 수집+정리
for i in range(0, len(start_date_info)):
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         dimensions=dimensions_value, metrics=metrics_value).execute()
        df_temp = pd.DataFrame(result['rows'], columns =column_name)
        df_temp['date'] = start_date_info[i]
        df1_ga = pd.concat([df1_ga, df_temp])
    except:
        pass

# 자료의 상위속성 추가
df1_ga['gubun'] = 'APP activation'

# 임시자료 추가(2021년 1월 미수집에 따른)
df_temp = pd.DataFrame({"browser":['Android Webview'], "users":[0], "date":['2021-01-01'], "gubun":['APP activation']})
df1_ga = df1_ga.append(df_temp, ignore_index = True)

# 필요한 자료만 필터링
df1_ga = df1_ga[(df1_ga.browser == 'Android Webview' )|(df1_ga.browser == 'Safari (in-app)')]

# header name 변경
df1_ga.rename(columns = {'browser' : 'gubun2', 'users' : 'value'}, inplace = True)
df1_ga = df1_ga.reset_index(drop = True)

# 월 정렬
df1_ga['date'] = df1_ga['date'].str[:7]

# pivot table
df_MTLY = pd.pivot_table(df1_ga, index = ['gubun2'], values = 'value', columns = 'date', aggfunc = np.sum).fillna(0)

# total값 추가가공
df_MTLY = df_MTLY.T
df_MTLY['APP activation : total'] = df_MTLY['Android Webview'].astype(int)+ df_MTLY['Safari (in-app)'].astype(int)
df_MTLY.rename(columns = {'Android Webview' : 'Android', 'Safari (in-app)' : 'iOS'}, inplace = True)
df_MTLY = df_MTLY.T

# 기준 테이블 작성/order 수정
df_order = pd.DataFrame()
df_order['gubun2'] = ['APP activation : total', 'Android', 'iOS']

df_MTLY= pd.merge(df_order, df_MTLY, on ='gubun2', how ='left')

df_MTLY.set_index(df_MTLY.gubun2,inplace = True)
del df_MTLY['gubun2']

In [33]:

# 조건2 : 필터링 항목(dimensions)
dimensions_value = 'ga:channelGrouping'

# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:sessions'

# 수집내용 정의
column_name = (dimensions_value +','+ metrics_value).replace('ga:', '').split(",")
df2_ga = pd.DataFrame( columns =column_name)
df2_ga['date'] = ''

# 결과 수집+정리
for i in range(0, len(start_date_info)):
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         dimensions=dimensions_value, metrics=metrics_value).execute()
        df_temp = pd.DataFrame(result['rows'], columns =column_name)
        df_temp['date'] = start_date_info[i]
        df2_ga = pd.concat([df2_ga, df_temp])
    except:
        pass

# 자료의 상위속성 추가
df2_ga['gubun'] = 'ALL Activation'

# header name 변경
df2_ga.rename(columns = {'channelGrouping' : 'gubun2', 'sessions' : 'value'}, inplace = True)
df2_ga = df2_ga.reset_index(drop = True)

df2_ga['gubun2'][df2_ga['gubun2']=='Display'] = 'Display Ad'
df2_ga['gubun2'][df2_ga['gubun2']=='Social'] = 'Organic Social'
df2_ga['gubun2'][df2_ga['gubun2']=='CRM'] = 'Owned Channel'
df2_ga['gubun2'][df2_ga['gubun2']=='Paid Search'] = 'Search Ad'
df2_ga['gubun2'][df2_ga['gubun2']=='Referral'] = 'Site Referral'

# 월 정렬
df2_ga['date'] = df2_ga['date'].str[:7]
df2_MTLY = pd.pivot_table(df2_ga, index = ['gubun2'], values = 'value', columns = 'date', aggfunc = np.sum).fillna(0)

# # # total값 추가가공
df2_MTLY = df2_MTLY.T
df2_MTLY['ALL activation : Total'] = df2_MTLY['(Other)'].astype(int) + df2_MTLY['Direct'].astype(int)+ df2_MTLY['Display Ad'].astype(int)+ df2_MTLY['Organic Search'].astype(int)+ df2_MTLY['Search Ad'].astype(int)+ df2_MTLY['Site Referral'].astype(int)+ df2_MTLY['Organic Social'].astype(int)+ df2_MTLY['Owned Channel'].astype(int)
df2_MTLY = df2_MTLY.T

# # # 기준 테이블 작성/order 수정
df_order = pd.DataFrame()
df_order['gubun2'] = ['ALL activation : Total', 'Search Ad', 'Display Ad', 'Video Ad', 'Owned Channel', 'Organic Search','Organic Social','Site Referral', 'Direct', '(Other)']
df2_MTLY= pd.merge(df_order, df2_MTLY, on ='gubun2', how ='left').fillna(0)
df2_MTLY.set_index(df2_MTLY.gubun2,inplace = True)
del df2_MTLY['gubun2']

# df2_MTLY = df2_MTLY.reset_index(drop = False)

In [34]:
Acquisition = pd.concat([df1, df_MTLY, df2_MTLY]).fillna(0)
Acquisition = Acquisition.apply(pd.to_numeric)
Acquisition['증가율(%)'] = round((Acquisition[Acquisition.columns[-1]] - Acquisition[Acquisition.columns[-2]])/Acquisition[Acquisition.columns[-2]]*100,2)

In [35]:
Acquisition['증가율(%)'].mean()

-26.241428571428575

In [36]:
Acquisition

Unnamed: 0,2021-01,2021-02,2021-03,2021-04,2021-05,2021-06,2021-07,증가율(%)
App install,837,18281,1180,1478,1443,1336,787,-41.09
App install : paid,357,12811,729,1017,908,813,647,-20.42
APP activation : total,0,15689,13017,58668,90070,65613,53435,-18.56
Android,0,11427,10217,53451,75311,56633,49174,-13.17
iOS,0,4262,2800,5217,14759,8980,4261,-52.55
ALL activation : Total,0,52242,45195,171801,260373,414592,220358,-46.85
Search Ad,0,316,17,203,843,22354,31071,39.0
Display Ad,0,0,9251,9362,14119,259587,122946,-52.64
Video Ad,0,0,0,0,0,0,0,
Owned Channel,0,0,0,70,313,5190,8422,62.27


In [30]:
before = Acquisition['2021-04'][5]-Acquisition['2021-04'][7]-Acquisition['2021-04'][-1]
after  = Acquisition['2021-05'][5]-Acquisition['2021-05'][7]-Acquisition['2021-05'][-1]
ratio = round((after - before)/before*100,1)
cnt = after- before
Acquisition_summary_text1 = '광고로 인한 고객 유입은 전월대비 {}% 증가함(인원 : {:}명/월)'.format(ratio, cnt)
Acquisition_summary_text1

'광고로 인한 고객 유입은 전월대비 35.6% 증가함(인원 : 14346명/월)'

## 2.Activation

In [37]:
##########
# 2.Activation : DAU
####

# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:users'

# 수집내용 정의
column_name = (metrics_value).replace('ga:', '').split(",")
df3_1_ga = pd.DataFrame( columns =column_name)
df3_1_ga['date'] = ''

# 결과 수집+정리
for i in range(0, len(start_date_info)):
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         metrics=metrics_value).execute()
        df_temp = pd.DataFrame(result['rows'], columns =column_name)
        df_temp['date'] = start_date_info[i]
        df3_1_ga = pd.concat([df3_1_ga, df_temp])
    except:
        pass

# 자료의 상위속성 추가
df3_1_ga['gubun'] = 'MAU_Total'
df3_1_ga['gubun2'] = 'user'

# header name 변경
df3_1_ga.rename(columns = { 'users' : 'value'}, inplace = True)
df3_1_ga = df3_1_ga.reset_index(drop = True)

# 월 정렬
df3_1_ga['date'] = df3_1_ga['date'].str[:7]

# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:Newusers'

# 수집내용 정의
column_name = (metrics_value).replace('ga:', '').split(",")
df3_2_ga = pd.DataFrame( columns =column_name)
df3_2_ga['date'] = ''

# 결과 수집+정리
for i in range(0, len(start_date_info)):
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         metrics=metrics_value).execute()
        df_temp = pd.DataFrame(result['rows'], columns =column_name)
        df_temp['date'] = start_date_info[i]
        df3_2_ga = pd.concat([df3_2_ga, df_temp])
    except:
        pass

# 자료의 상위속성 추가
df3_2_ga['gubun'] = 'MAU_Total'
df3_2_ga['gubun2'] = 'Newuser'

# 임시자료 추가(2021년 1월 미수집에 따른)
df_temp = pd.DataFrame({"gubun2":['Newuser'], "Newuser":[0], "date":['2021-01'], "gubun":['MAU_Total']})
df3_2_ga = df3_2_ga.append(df_temp, ignore_index = True)

# header name 변경
df3_2_ga.rename(columns = { 'Newusers' : 'value'}, inplace = True)
df3_2_ga = df3_2_ga.reset_index(drop = True)

# 월 정렬
df3_2_ga['date'] = df3_2_ga['date'].str[:7]
df3_ga = pd.concat([df3_1_ga, df3_2_ga])

df3_MTLY = pd.pivot_table(df3_ga, index = ['gubun2'], values = 'value', columns = 'date', aggfunc = np.sum).fillna(0)

# diff값 추가가공
df3_MTLY = df3_MTLY.T
df3_MTLY['Revisituser'] = df3_MTLY['user'].astype(int) - df3_MTLY['Newuser'].astype(int)
df3_MTLY = df3_MTLY.T

# 기준 테이블 작성/order 수정
df_order = pd.DataFrame()
df_order['gubun2'] = ['user', 'Newuser', 'Revisituser']
df3_MTLY= pd.merge(df_order, df3_MTLY, on ='gubun2', how ='left')
df3_MTLY.set_index(df3_MTLY.gubun2,inplace = True)
del df3_MTLY['gubun2']

In [38]:
# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:users'

# 수집내용 정의
column_name = ( metrics_value).replace('ga:', '').split(",")
df4_1_ga = pd.DataFrame( columns =column_name)
df4_1_ga['date'] = ''

for i in range(0, len(start_date_info)):
    # 결과 출력
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         metrics=metrics_value).execute()
        day_diff = (datetime.strptime(end_date_info[i], "%Y-%m-%d") - datetime.strptime(start_date_info[i], "%Y-%m-%d")).days
        df_temp = pd.DataFrame(result['rows'], columns =column_name).astype(int)/day_diff
        df_temp['date'] = start_date_info[i]
        df4_1_ga = pd.concat([df4_1_ga, df_temp])
    except:
        pass

# 자료의 상위속성 추가
df4_1_ga['gubun'] = 'DAU_Total'
df4_1_ga['gubun2'] = 'user'

# header name 변경
df4_1_ga.rename(columns = {'channelGrouping' : 'gubun2', 'users' : 'value'}, inplace = True)
df4_1_ga = df4_1_ga.reset_index(drop = True)

# 월 정렬
df4_1_ga['date'] = df4_1_ga['date'].str[:7]

# 조건3 : 측정 단위(metrics)
metrics_value = 'ga:Newusers'

# 수집내용 정의
column_name = (metrics_value).replace('ga:', '').split(",")
df4_2_ga = pd.DataFrame( columns =column_name)
df4_2_ga['date'] = ''

# 결과 수집+정리
for i in range(0, len(start_date_info)):
    # 결과 출력
    try:
        result = service.data().ga().get(ids='ga:236972743',
                                         start_date=start_date_info[i], end_date=end_date_info[i], 
                                         metrics=metrics_value).execute()
        day_diff = (datetime.strptime(end_date_info[i], "%Y-%m-%d") - datetime.strptime(start_date_info[i], "%Y-%m-%d")).days
        df_temp = pd.DataFrame(result['rows'], columns =column_name).astype(int)/day_diff
        df_temp['date'] = start_date_info[i]
        df4_2_ga = pd.concat([df4_2_ga, df_temp])
    except:
        pass


# 자료의 상위속성 추가
df4_2_ga['gubun'] = 'DAU_Total'
df4_2_ga['gubun2'] = 'Newuser'


# 임시자료 추가(2021년 1월 미수집에 따른)
df_temp = pd.DataFrame({"gubun2":['Newuser'], "Newuser":[0], "date":['2021-01'], "gubun":['DAU_Total']})
df4_2_ga = df4_2_ga.append(df_temp, ignore_index = True)


# header name 변경
df4_2_ga.rename(columns = {'channelGrouping' : 'gubun2', 'Newusers' : 'value'}, inplace = True)
df4_2_ga = df4_2_ga.reset_index(drop = True)

# 월 정렬
df4_2_ga['date'] = df4_2_ga['date'].str[:7]
df4_ga = pd.concat([df4_1_ga, df4_2_ga])

df4_MTLY = pd.pivot_table(df4_ga, index = ['gubun2'], values = 'value', columns = 'date', aggfunc = np.sum).fillna(0)

# # diff값 추가가공
df4_MTLY = df4_MTLY.T
df4_MTLY['Revisituser'] = df4_MTLY['user'] - df4_MTLY['Newuser']
df4_MTLY = df4_MTLY.T

# # 기준 테이블 작성/order 수정
df_order = pd.DataFrame()
df_order['gubun2'] = ['user', 'Newuser', 'Revisituser']
df4_MTLY= pd.merge(df_order, df4_MTLY, on ='gubun2', how ='left')
df4_MTLY.set_index(df4_MTLY.gubun2,inplace = True)
del df4_MTLY['gubun2']

In [39]:
##########
# 2.Activation
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/2.Activation_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df2 = cursor.fetchall()[1:]
# df1 = pd.DataFrame(df1)
df2 = pd.DataFrame(df2, columns = ['MONTH', '신규가입', '신규가입-첫구매완료', '신규가입-신규대비(%)', '누적-회원수','누적-구매',
                                   '수신동의', '수신동의-SMS동의', '수신동의-PUSH동의', '수신동의-EMIL동의'])
df2 = df2.set_index('MONTH').T.fillna(0)

In [40]:
Activation = pd.concat([df4_MTLY, df3_MTLY, df2]).fillna(0)
Activation = Activation.apply(pd.to_numeric)
Activation['증가율(%)'] = round((Activation[Activation.columns[-1]] - Activation[Activation.columns[-2]])/Activation[Activation.columns[-2]]*100,2)

In [41]:
Activation

Unnamed: 0,2021-01,2021-02,2021-03,2021-04,2021-05,2021-06,2021-07,증가율(%)
user,0.0,730.703704,804.433333,4172.413793,5676.3,6015.724138,4187.72,-30.39
Newuser,0.0,736.074074,674.366667,4025.034483,5123.866667,5226.689655,3486.4,-33.3
Revisituser,0.0,-5.37037,130.066667,147.37931,552.433333,789.034483,701.32,-11.12
user,0.0,19729.0,24133.0,121000.0,170289.0,174456.0,104693.0,-39.99
Newuser,0.0,19874.0,20231.0,116726.0,153716.0,151574.0,87160.0,-42.5
Revisituser,0.0,-145.0,3902.0,4274.0,16573.0,22882.0,17533.0,-23.38
신규가입,814.0,18464.0,1719.0,3186.0,5575.0,4715.0,3996.0,-15.25
신규가입-첫구매완료,20.0,13881.0,1266.0,1963.0,3308.0,2795.0,2249.0,-19.53
신규가입-신규대비(%),0.0246,0.7518,0.7365,0.6161,0.5934,0.5928,0.5628,-5.06
누적-회원수,817.0,19281.0,21000.0,24186.0,29761.0,34476.0,38472.0,11.59


In [42]:
newuser_MAU  = Activation['2021-05'][4] - Activation['2021-04'][4]
newuser_ratio_MAU = round(  newuser_MAU / Activation['2021-04'][4] *100,1)

revisit_MAU = Activation['2021-05'][5] - Activation['2021-04'][5]
revisit_ratio_MAU = round( revisit_MAU / Activation['2021-04'][5] *100,1)

Activation_summary_text1 = 'MAU(회원)관점에서는 신규회원은 {}% 증가(인원 : {:,}명/월) / 재방문은 {}% 증가({:}명/월)'.format( newuser_ratio_MAU, newuser_MAU, revisit_ratio_MAU, revisit_MAU ) 

In [43]:
Activation_summary_text1

'MAU(회원)관점에서는 신규회원은 31.7% 증가(인원 : 36,990.0명/월) / 재방문은 287.8% 증가(12299.0명/월)'

## 3.Retention-Daily

In [44]:
##########
# 3.Retention-Daily
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/3.Retention-Daily_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df3 = cursor.fetchall()[1:]
df3 = pd.DataFrame(df3, columns = ['MONTH', ('Daily PU', ''), ('Daily PU', 'NPU'),  ('Daily PU', 'RPU'),  ('Daily PU', '비회원'), 
                                   'Daily 결제건수(Total)', 'Daily 결제건수(NPU)', 'Daily 결제건수(RPU)',
                                   'Daily PU_쿠폰사용(Total)', 'Daily PU_쿠폰사용(NPU)', 'Daily PU_쿠폰사용(RPU)',
                                   'Daily 결제건수_쿠폰사용(Total)', 'Daily 결제건수_쿠폰사용(NPU)', 'Daily 결제건수_쿠폰사용(RPU)'])
df3_1 = df3.set_index('MONTH').T.fillna(0)

In [45]:
df3_1

MONTH,2021-01,2021-02,2021-03,2021-04,2021-05,2021-06,2021-07
"(Daily PU, )",1.4,527.8,84.2,119.5,164.6,159.4,168.9
"(Daily PU, NPU)",1.4,496.6,50.3,80.2,117.2,94.7,100.3
"(Daily PU, RPU)",1.0,36.7,35.0,41.3,50.4,67.2,71.3
"(Daily PU, 비회원)",1.0,1.0,1.1,1.5,2.8,2.9,2.5
Daily 결제건수(Total),1.5,534.6,86.4,123.1,170.3,164.3,174.6
Daily 결제건수(NPU),1.4,496.6,50.3,80.2,117.2,94.7,100.3
Daily 결제건수(RPU),0.1,38.0,36.1,42.9,53.1,69.6,74.3
Daily PU_쿠폰사용(Total),1.0,25.2,34.7,25.0,29.0,84.9,54.7
Daily PU_쿠폰사용(NPU),1.0,25.0,29.5,12.3,11.7,21.8,14.8
Daily PU_쿠폰사용(RPU),0.0,1.3,5.9,5.5,3.7,8.5,13.7


In [46]:
##########
# 3.Retention-Daily
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/3.Retention-Monthly_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df3 = cursor.fetchall()[1:]
df3_temp = pd.DataFrame(df3, columns = ['MONTH', ('Monthly PU', ''), ('Monthly PU', 'NPU'),  ('Monthly PU', 'RPU'),  ('Monthly PU', '비회원'), 
                                   'Monthly 결제건수(Total)', 'Monthly 결제건수(NPU)', 'Monthly 결제건수(RPU)',
                                   'Monthly PU_쿠폰사용(Total)', 'Monthly PU_쿠폰사용(NPU)', 'Monthly PU_쿠폰사용(RPU)',
                                   'Monthly 결제건수_쿠폰사용(Total)', 'Monthly 결제건수_쿠폰사용(NPU)', 'Monthly 결제건수_쿠폰사용(RPU)'])
df3_2 = df3_temp.set_index('MONTH').T.fillna(0)

In [47]:
##########
# 4.Retention-coupon
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/3.Retention-Coupon_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df3 = cursor.fetchall()[1:]
# df3 = pd.DataFrame(df3)
df3 = pd.DataFrame(df3, columns = ['MONTH', '매출', '쿠폰사용 매출', '쿠폰사용 매출 증가율', '쿠폰사용 매출 비중'])
df3_3 = df3.set_index('MONTH').T.fillna(0)

In [48]:
##########
# 4.Retention-Freq
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/3.Retention-Freq_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df3 = cursor.fetchall()[1:]
# df4 = pd.DataFrame(df4)
df3 = pd.DataFrame(df3, columns = ['MONTH', '구매빈도(합계)', '구매빈도(신규)', '구매빈도(유보)', '구매빈도(시도)', '구매빈도(정착)',
                                   '구매빈도(이탈)', '구매빈도(휴면)'])

df3_4 = df3.set_index('MONTH').T.fillna(0)
df3_4 = df3_4.T
df3_4['구매빈도(미구매)'] = 0
df3_4 = df3_4.T


In [49]:
##########
# 3.Retention-Freq
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/3.Retention-timediff_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df3 = cursor.fetchall()[1:]
# df3 = pd.DataFrame(df3)
df3 = pd.DataFrame(df3, columns = ['MONTH', '구매빈도v3(누계)', '구매빈도v3(월별)'])
df3_5 = df3.set_index('MONTH').T.fillna(0)

In [50]:
Retention = pd.concat([df3_1, df3_2, df3_3, df3_4, df3_5])
Retention = Retention.apply(pd.to_numeric)
Retention['증가율(%)'] = round((Retention[Retention.columns[-1]] - Retention[Retention.columns[-2]])/Retention[Retention.columns[-2]]*100,2)

In [51]:
round(Retention['증가율(%)'].mean(),2)

10.64

In [52]:
Retention

MONTH,2021-01,2021-02,2021-03,2021-04,2021-05,2021-06,2021-07,증가율(%)
"(Daily PU, )",1.4,527.8,84.2,119.5,164.6,159.4,168.9,5.96
"(Daily PU, NPU)",1.4,496.6,50.3,80.2,117.2,94.7,100.3,5.91
"(Daily PU, RPU)",1.0,36.7,35.0,41.3,50.4,67.2,71.3,6.1
"(Daily PU, 비회원)",1.0,1.0,1.1,1.5,2.8,2.9,2.5,-13.79
Daily 결제건수(Total),1.5,534.6,86.4,123.1,170.3,164.3,174.6,6.27
Daily 결제건수(NPU),1.4,496.6,50.3,80.2,117.2,94.7,100.3,5.91
Daily 결제건수(RPU),0.1,38.0,36.1,42.9,53.1,69.6,74.3,6.75
Daily PU_쿠폰사용(Total),1.0,25.2,34.7,25.0,29.0,84.9,54.7,-35.57
Daily PU_쿠폰사용(NPU),1.0,25.0,29.5,12.3,11.7,21.8,14.8,-32.11
Daily PU_쿠폰사용(RPU),0.0,1.3,5.9,5.5,3.7,8.5,13.7,61.18


In [53]:
occurrence_new  = Retention['2021-05'][-9] - Retention['2021-04'][-9]
occurrence_new_ratio = Retention['증가율(%)'][-9]

Retention_summary_text1 = '신규 구매자는 전월대비 {}건 증가 (전월대비 {}%)하였으나, 유보자(=이탈 예상 인원)는 변동이 없는 상황임'.format(occurrence_new, occurrence_new_ratio) 
Retention_summary_text2 = '유보자 미활동에 따라, 재구매 기간이 {}일(전 기간)에서 {}일(당월 구매자 평균)으로 늘어나는 상황임'.format(Retention['2021-05'][-2] ,Retention['2021-05'][-1] ) 

In [54]:
Retention_summary_text1

'신규 구매자는 전월대비 0.0건 증가 (전월대비 407.29%)하였으나, 유보자(=이탈 예상 인원)는 변동이 없는 상황임'

In [55]:
Retention_summary_text2

'유보자 미활동에 따라, 재구매 기간이 33.93일(전 기간)에서 46.62일(당월 구매자 평균)으로 늘어나는 상황임'

## 4.Revenue

In [56]:
##########
# 4.Revenue-GMV
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/4.Revenue-GMV_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df4 = cursor.fetchall()[1:]
# df4 = pd.DataFrame(df4)
df4 = pd.DataFrame(df4, columns = ['MONTH', '주문_Total(A+B+C)'  , '주문_Total_NPU',       '주문_Total_RPU',
                                            '결제금액_Total(A)'  , '결제금액_Total_NPU',   '결제금액_Total_RPU', 
                                            '제품할인_Total(B)'  , '제품할인_Total_NPU',   '제품할인_Total_RPU', 
                                            '포인트사용_Total(C)', '포인트사용_Total_NPU', '포인트사용_Total_RPU', 
                                            '포인트적립_Total'   , '포인트적립_Total_NPU', '포인트적립_Total_RPU' ])

df4['AOV_Total'] = df4['주문_Total(A+B+C)']/df3_temp['Monthly 결제건수(Total)']
df4['AOV_Total_NPU'] = df4['주문_Total_NPU']/df3_temp['Monthly 결제건수(NPU)']
df4['AOV_Total_RPU'] = df4['주문_Total_RPU']/df3_temp['Monthly 결제건수(RPU)']

df4_1 = df4.set_index('MONTH').T.fillna(0)

In [57]:
##########
# 4.Revenue-기여효과
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/4.Revenue-GMV_기여효과_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df4 = cursor.fetchall()[1:]
# df4 = pd.DataFrame(df4)
df4 = pd.DataFrame(df4, columns = ['MONTH', 'GMV_ALL',      'GMV_쿠폰',      'GMV_포인트',      'GMV_쿠N포',
                                            '결제건수_ALL', '결제건수_쿠폰', '결제건수_포인트', '결제건수_쿠N포',
                                            'AOV_ALL', 'AOV_쿠폰', 'AOV_포인트', 'AOV_쿠N포'
                                  ])
df4_2 = df4.set_index('MONTH').T.fillna(0)


In [58]:
##########
# 4.Revenue-MC
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/4.Revenue-MC_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df4 = cursor.fetchall()[1:]
# df4 = pd.DataFrame(df4)
df4 = pd.DataFrame(df4, columns = ['MONTH', 'Cost(Total)'  , 'Cost(ratio)'])
df4_3 = df4.set_index('MONTH').T.fillna(0)

In [59]:
Revenue = pd.concat([df4_1, df4_2, df4_3])
Revenue = Revenue.apply(pd.to_numeric)
Revenue['증가율(%)'] = round((Revenue[Revenue.columns[-2]] - Revenue[Revenue.columns[-3]])/Revenue[Revenue.columns[-3]]*100,2)

In [60]:
Revenue

MONTH,2021-01,2021-02,2021-03,2021-04,2021-05,2021-06,2021-07,증가율(%)
주문_Total(A+B+C),41703.0,1261305.0,249733.0,355000.0,521799.0,590875.0,511912.0,13.24
주문_Total_NPU,27624.0,1132011.0,130944.0,209359.0,303744.0,289989.0,256856.0,-4.53
주문_Total_RPU,14079.0,129294.0,118789.0,145641.0,218055.0,300886.0,255056.0,37.99
결제금액_Total(A),20038.0,251167.0,126870.0,168587.0,301321.0,353059.0,294748.0,17.17
결제금액_Total_NPU,12396.0,209735.0,69012.0,97210.0,166538.0,164606.0,141692.0,-1.16
결제금액_Total_RPU,7642.0,41432.0,57858.0,71377.0,134783.0,188453.0,153056.0,39.82
제품할인_Total(B),1043.0,227858.0,111028.0,149243.0,242557.0,266783.0,229184.0,9.99
제품할인_Total_NPU,973.0,197664.0,65555.0,94317.0,163920.0,154407.0,133734.0,-5.8
제품할인_Total_RPU,70.0,30194.0,45473.0,54926.0,78637.0,112376.0,95450.0,42.9
포인트사용_Total(C),4764.0,215198.0,14120.0,18724.0,24652.0,11338.0,9391.0,-54.01


In [61]:
occurrence_new  = Retention['2021-05'][-9] - Retention['2021-04'][-9]
occurrence_new_ratio = Retention['증가율(%)'][-10]

occurrence_new
# Revenue_summary_text1 = 'NPU vs RPU : 건당 평균 구매금액  {} 천 원으로, 전월대비 {}% 증가하였으나, RPU(기존 고객)와 NPU(신규고객)간의 차이가 줄어들고 있는 상황임'.format( occurrence_new_ratio) 
# Revenue_summary_text2 = '쿠폰 vs 포인트 : 쿠폰에 의한 결제금액(기여, {}%)가, 포인트 기반 결제금액(기여, {}%)보다 크게 상승함'.format(Retention['2021-04'][-2] ,Retention['2021-04'][-1] ) 

0.0

In [62]:
Retention['2021-05'][-9] 

0.0

## 5.Referral

In [63]:
##########
# 5.Referral
####

# query 열기
query = "C:/Users/MAEIL/Desktop/셀렉스몰/18. query(AARRR)/AARRR_query_v2/monthly/5.Referral_v2.3.txt"

# DB접속 : 접속정보 읽기
con_text = open("C:/Users/MAEIL/conn_SELEX.txt", 'r', encoding='utf8')
con_text = con_text.read()

# DB 접속 : 실제 접속
conn = oci.connect(con_text[1:], encoding='UTF-8', nencoding='UTF-8')

f = open(query, 'r')
line = f.read()
f.close()

## query 실행( SQL문 실행 메모리 영역에서 진행)
cursor = conn.cursor()
cursor.execute(line)
df5 = cursor.fetchall()[1:]
# df5 = pd.DataFrame(df5)
df5 = pd.DataFrame(df5, columns = ['MONTH', '추천인_유입수', '추천자_발생수', '추천자_GMV', '추천자_평균매출','리뷰수', '제품평점'])
df5 = df5.set_index('MONTH').T.fillna(0)

In [64]:
Referral = df5
Referral = Referral.apply(pd.to_numeric)
Referral['증가율(%)'] = round((Referral[Referral.columns[-2]] - Referral[Referral.columns[-3]])/Referral[Referral.columns[-3]]*100,2)

In [65]:
Referral

MONTH,2021-01,2021-02,2021-03,2021-04,2021-05,2021-06,2021-07,증가율(%)
추천인_유입수,9.0,4899.0,0.0,1.0,0.0,0.0,0.0,
추천자_발생수,12.0,18176.0,0.0,1.0,0.0,0.0,0.0,
추천자_GMV,0.0,1134769.0,0.0,320.0,0.0,0.0,0.0,
추천자_평균매출,0.0,62.43,0.0,320.0,0.0,0.0,0.0,
리뷰수,1.0,2779.0,1102.0,1182.0,970.0,1116.0,975.0,15.05
제품평점,5.0,4.98,4.95,4.96,4.96,4.94,4.95,-0.4


In [66]:
Referral_summary_text1 = '[리뷰수]는 전월대비 {}% 감소함( {}건/월)'.format(Referral['증가율(%)'][-2], Referral['2021-05'][-2] - Referral['2021-04'][-2])

In [67]:
Referral_summary_text1

'[리뷰수]는 전월대비 15.05% 감소함( -212.0건/월)'