In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm.notebook import tqdm
tqdm.pandas()

  from pandas import Panel


In [None]:
# Standard plotly imports
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

In [None]:
cycle_df = pd.read_csv('cycle_table.csv')
settlement_df = pd.read_csv('settlement_table.csv')

In [None]:
'''
Converting all Date to standard Datetime Format

'''

cycle_df['start_date']= cycle_df.start_date.apply(lambda x: datetime.strptime(x, '%d/%m/%y %H:%M'))
cycle_df['end_date']= cycle_df.end_date.apply(lambda x: datetime.strptime(x, '%d/%m/%y %H:%M'))
settlement_df['created_at']= settlement_df.created_at.apply(lambda x: datetime.strptime(x, '%d/%m/%y %H:%M'))

In [None]:
'''
Function to map Creation Date for Billing Cycle
'''

def get_cycle_daterange(value):
    for row in cycle_df.iterrows():
        if row[1]['start_date']<=value['created_at']<=row[1]['end_date']:
            value['start_date']=row[1]['start_date']
            value['end_date']=row[1]['end_date']
            value['billing_cycle_id']=row[1]['id']
            return value
        else:
            pass

In [None]:
# combined_df = settlement_df.progress_apply(get_cycle_daterange,axis=1)
combined_df = pd.read_csv('combined_data.csv')

In [None]:
'''
Frequency of Bill Creation on every Billing Cycle
'''

def month_wise_billcreation_count(df):
    df = df[df.status=='billcreated']
    df.billing_cycle_id.value_counts().iplot(kind='bar', xTitle='Billing Cycle ID',
                  yTitle='count', title='Bill Creation Frequency Distribution')



In [None]:
month_wise_billcreation_count(combined_df)

In [None]:
'''
Frequency of Bill Paying on every Billing Cycle

'''

def month_wise_billpaid_count(df):
    df = df[df.status=='billpaid']
    df.billing_cycle_id.value_counts().iplot(kind='bar', xTitle='Billing Cycle ID',
                  yTitle='count', title='Bill Paid Frequency Distribution')
    

In [None]:
month_wise_billpaid_count(combined_df)

In [None]:
combined_df.user_id.value_counts()[:20].iplot(kind='bar',title='Top 20 user_id interaction frequency',xTitle='userid',yTitle='count')

In [None]:
'''
Function to Find out all bill created and paid
during each payment cycle

'''

def total_bill_status_cyclewise(combined_df):
    set_of_billing_cycle_ids = set(combined_df['billing_cycle_id'].values)
    total_bill_created = []
    total_bill_paid = []
    count_bill_created = []
    count_bill_paid = []
    for cycle_id in tqdm(set_of_billing_cycle_ids):
        df = combined_df[combined_df.billing_cycle_id==cycle_id]
        created_df = df[df.status=='billcreated']
        paid_df = df[df.status=='billpaid']
        
        count_bill_created.append(len(created_df))
        count_bill_paid.append(len(paid_df))
        
        total_bill_created.append(created_df.amount.sum())
        total_bill_paid.append(paid_df.amount.sum())
        
    settlement_df = pd.DataFrame()
    settlement_df['total_bill_created']=total_bill_created
    settlement_df['total_bill_paid']=total_bill_paid
    settlement_df['bill_cycle_id']=set_of_billing_cycle_ids
    
    
    settlement_df.iplot(x='bill_cycle_id',kind='bar',y=['total_bill_paid','total_bill_created'],xTitle='Bill cycle id',yTitle='Amount',title='Billing Transactioon Details for Each Cycle')
    return settlement_df


In [None]:
total_bill_status_cyclewise(combined_df)

In [None]:
def create_empty_user_information_df():
    user_information_df = pd.DataFrame()
    user_information_df['user_id']=list(set(combined_df['user_id'].values))
    user_information_df['netAmount']=0
    return user_information_df

In [None]:
def userid_wise_billingcycle_update_fn(df,user_information_df):
    paid_count = 0
    created_count = 0
    paid_amount = 0
    created_amount = 0
    
    for row in df.iterrows():
        if row[1]['status']=='billpaid':
            user_information_df.loc[user_information_df.user_id==row[1]['user_id'],'netAmount']+=row[1]['amount']
            paid_count+=1
            paid_amount+=row[1]['amount']
            
        elif row[1]['status']=='billcreated':
            user_information_df.loc[user_information_df.user_id==row[1]['user_id'],'netAmount']-=row[1]['amount']
            created_count+=1
            created_amount+=row[1]['amount']
    
    return paid_count,paid_amount,created_count,created_amount

In [None]:
set_of_billing_cycle_ids = set(combined_df['billing_cycle_id'].values)

user_information_df = create_empty_user_information_df()
paid_count_list = []
created_count_list = []

paid_amount_list = []
created_amount_list = []

for cycle_id in tqdm(set_of_billing_cycle_ids):
    df = combined_df[combined_df.billing_cycle_id==cycle_id]
    paid_count,paid_amount,created_count,created_amount=userid_wise_billingcycle_update_fn(df,user_information_df)
    
    paid_count_list.append(paid_count)
    paid_amount_list.append(paid_amount)
    created_count_list.append(created_count)
    created_amount_list.append(created_amount)
    
    print('Completed Cycle ID:{}'.format(cycle_id))
    print('Paid Count:{}  created Count:{}'.format(paid_count,created_count))
    print('paid Amount:{} created Amount:{}'.format(paid_amount,created_amount))
    print('\n')
    

result_df = pd.DataFrame()
result_df['cycle_id']=list(set_of_billing_cycle_ids)
result_df['paid_count']=paid_count_list
result_df['created_count']=created_count_list
result_df['paid_amount']=paid_amount_list
result_df['created_amount']=created_amount_list

In [None]:
result_df.iplot(x='cycle_id',y=['paid_amount','created_amount'],xTitle='Billing Cycle id',yTitle='Amount',title='Cycle wise Amount')

In [None]:
result_df.iplot(x='cycle_id',y=['paid_count','created_count'],xTitle='Billing Cycle id',yTitle='Amount',title='Cycle wise user_id Count')

In [None]:
def user_id_performance_over_entire_billcycle(userid):
    df = combined_df[combined_df.user_id==userid]
    set_of_billing_cycle_ids = set(combined_df['billing_cycle_id'].values)
    
    paid_count_list = []
    created_count_list = []

    paid_amount_list = []
    created_amount_list = []
    for cycle_id in set_of_billing_cycle_ids:
        
        paid_count=0
        created_count=0
        paid_amount = 0
        created_amount = 0
        
        if len(df[df.billing_cycle_id==cycle_id]):
            for row in df[df.billing_cycle_id==cycle_id].iterrows():
                if row[1]['status']=='billpaid':
                    paid_count+=1
                    paid_amount+=row[1]['amount']

                elif row[1]['status']=='billcreated':
                    created_count+=1
                    created_amount+=row[1]['amount']

        else:
            pass
        
        paid_count_list.append(paid_count)
        created_count_list.append(created_count)
        paid_amount_list.append(paid_amount)
        created_amount_list.append(created_amount)
        
    
    result_df = pd.DataFrame()
    result_df['cycle_id']=list(set_of_billing_cycle_ids)
    result_df['paid_count']=paid_count_list
    result_df['created_count']=created_count_list
    result_df['paid_amount']=paid_amount_list
    result_df['created_amount']=created_amount_list
    
    result_df.iplot(x='cycle_id',kind='bar',y=['paid_amount','created_amount'],xTitle='Billing Cycle id',yTitle='Amount',title='Cycle wise Amount')
    result_df.iplot(x='cycle_id',kind='bar',y=['paid_count','created_count'],xTitle='Billing Cycle id',yTitle='Amount',title='Cycle wise user_id Count')
    
    return result_df

In [None]:
result = user_id_performance_over_entire_billcycle(2378)