In [None]:
import pandas as pd
import numpy as np
import json
import os
from tqdm.notebook import tqdm
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
%matplotlib inline
from collections import Counter, defaultdict
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import string

### Summary Table

In [None]:
summary_table = []

In [None]:
user_index = json.load(open("../data/user_index.json",'r'))
commitment_index = json.load(open("../data/commitment_index.json",'r'))

counters = json.load(open("../data/counter.json",'r'))
with open("../data/users_not_found.txt",'r') as fp:
    user_not_found = fp.readlines()
    
with open("../data/private_users.txt",'r') as fp:
    private_users = fp.readlines()
    
user_not_found = [int(i.strip()) for i in user_not_found]
private_users = [int(i.strip()) for i in private_users]

In [None]:
max_user_id = max([int(i) for i in user_index.keys()])

In [None]:
for k,v in tqdm(user_index.items()):
    user_index[k]['date'] = datetime.strptime(v['date'], '%d %b %Y')

In [None]:
id_for_1_jan_2023 = 722621
for k,v in tqdm(user_index.items()):
    if int(k)<id_for_1_jan_2023 and v['date'].year == 2023:
        user_index[k]['date'] = None

In [None]:
last_date = user_index['1']['date']
for i in range(1, max_user_id+1):
    if str(i) in user_index.keys():
        if user_index[str(i)]['date'] == None:
            user_index[str(i)]['date'] = last_date
        else:
            last_date = user_index[str(i)]['date']

In [None]:
for k,v in tqdm(commitment_index.items()):
    commitment_index[k]['start_date'] = datetime.strptime(v['start_date'], '%B %d, %Y')
    commitment_index[k]['end_date'] = datetime.strptime(v['end_date'], '%B %d, %Y')

In [None]:
for k,v in tqdm(commitment_index.items()):
    if v['start_date'].year < 2007:
        deta = v['end_date'] - v['start_date']
        commitment_index[k]['start_date'] = user_index[str(v['user_id'])]['date']
        commitment_index[k]['end_date'] = commitment_index[k]['start_date'] + deta

In [None]:
summary_table.append(("Total # of Users", len(user_index)+len(user_not_found)+len(private_users)))
summary_table.append(("# of Private Users", len(private_users)))
summary_table.append(("# of Deleted Users", len(user_not_found)))
summary_table.append(("# of Public Users", len(user_index)))
summary_table.append(("# of total commitments", len(commitment_index)))

In [None]:
summary_table.append(("# of users with Commitments",len([k for k,v in tqdm(user_index.items()) if v['number_of_commitments']>0])))

In [None]:
summary_table.append(("Total $ at stake till date",f"${sum([v['total_stake'] for k,v in tqdm(commitment_index.items())]):.2f}"))

In [None]:
foo = [v['date'] for k,v in user_index.items()]
summary_table.append(("Users date range", f"{min(foo).strftime('%d %b %Y')} - {max(foo).strftime('%d %b %Y')}"))

In [None]:
pd.DataFrame(summary_table,columns=['Head','Count'])

# CDF of users joining Date

In [None]:
joining_dates = list(set([v['date'] for k,v in user_index.items()]))

In [None]:
min(joining_dates),max(joining_dates)

In [None]:
joining_dates.sort()

In [None]:
len(joining_dates)

In [None]:
total_public_users = len(user_index)
total_public_users

In [None]:
joining_date_public_user_CDF = np.zeros(len(joining_dates))
for k,v in tqdm(user_index.items()):
    date_index = joining_dates.index(v['date'])
    joining_date_public_user_CDF[date_index:]+=1

In [None]:
def get_closest_id(k):
    delta = 1
    while True:
        if str(k-delta) in user_index.keys():
            return str(k-delta)
        
        if str(k+delta) in user_index.keys():
            return str(k+delta)
        
        delta+=1

In [None]:
joining_date_private_users_CDF = np.zeros(len(joining_dates))
for p_id in tqdm(private_users):
    available_id = get_closest_id(p_id)
    date_index = joining_dates.index(user_index[available_id]['date'])
    joining_date_private_users_CDF[date_index:]+=1

In [None]:
joining_date_deleted_users_CDF = np.zeros(len(joining_dates))
for p_id in tqdm(user_not_found):
    available_id = get_closest_id(p_id)
    date_index = joining_dates.index(user_index[available_id]['date'])
    joining_date_deleted_users_CDF[date_index:]+=1

In [None]:
joining_date_public_user_CDF/=total_public_users

In [None]:
joining_date_private_users_CDF/=len(private_users)

In [None]:
joining_date_deleted_users_CDF/=len(user_not_found)

In [None]:
plt.plot(joining_dates,joining_date_public_user_CDF,label ='Public Users')
plt.plot(joining_dates,joining_date_private_users_CDF,label ='Private Users')
plt.plot(joining_dates,joining_date_deleted_users_CDF,label ='Deleted Users')
plt.legend()

# Number of commitment per user CDF

### ALL

In [None]:
unique_commitments_per_user = [v['number_of_commitments'] for k,v in user_index.items()]

In [None]:
sorted(Counter(unique_commitments_per_user).items(), key=lambda x:x[1],reverse=True)

In [None]:
unique_commitments_per_user = list(set(unique_commitments_per_user))

In [None]:
unique_commitments_per_user.sort()

In [None]:
unique_commitments_per_user = unique_commitments_per_user[1:]

In [None]:
len(unique_commitments_per_user)

In [None]:
total_users_with_commitments = 0
for k,v in user_index.items():
    if v['number_of_commitments']>0:
        total_users_with_commitments+=1

In [None]:
no_of_commitments_CDF = np.zeros(len(unique_commitments_per_user))
for k,v in tqdm(user_index.items()):
    if v['number_of_commitments']>0:
        count_index = unique_commitments_per_user.index(v['number_of_commitments'])
        no_of_commitments_CDF[count_index:]+=1

In [None]:
no_of_commitments_CDF/=total_users_with_commitments

In [None]:
plt.plot(unique_commitments_per_user,no_of_commitments_CDF,label ='Commitments per user')
plt.legend()

### <10

In [None]:
unique_commitments_per_user = [i for i in range(1,11)]

In [None]:
len(unique_commitments_per_user)

In [None]:
no_of_commitments_CDF = np.zeros(len(unique_commitments_per_user))
for k,v in tqdm(user_index.items()):
    if v['number_of_commitments']<=10 and v['number_of_commitments']>0:
        count_index = unique_commitments_per_user.index(v['number_of_commitments'])
        no_of_commitments_CDF[count_index:]+=1

In [None]:
no_of_commitments_CDF/=total_users_with_commitments

In [None]:
plt.plot(unique_commitments_per_user,no_of_commitments_CDF,label ='Commitments per user')
plt.legend()

### <100

In [None]:
unique_commitments_per_user = [i for i in range(1,101)]

In [None]:
len(unique_commitments_per_user)

In [None]:
no_of_commitments_CDF = np.zeros(len(unique_commitments_per_user))
for k,v in tqdm(user_index.items()):
    if v['number_of_commitments']<=100 and v['number_of_commitments']>0:
        count_index = unique_commitments_per_user.index(v['number_of_commitments'])
        no_of_commitments_CDF[count_index:]+=1

In [None]:
no_of_commitments_CDF/=total_users_with_commitments

In [None]:
plt.plot(unique_commitments_per_user,no_of_commitments_CDF,label ='Commitments per user')
plt.legend()

# Default Display Image

In [None]:
display_image_default = [v['default_image'] for k,v in user_index.items()]

In [None]:
Counter(display_image_default)

# No of commitments made everyday

In [None]:
commitment_start_dates = list(set([v['start_date'] for k,v in commitment_index.items()]))

In [None]:
min(commitment_start_dates),max(commitment_start_dates)

In [None]:
len(commitment_start_dates)

In [None]:
commitment_start_dates.sort()

In [None]:
commitment_count_date = np.zeros(len(commitment_start_dates))
for k,v in tqdm(commitment_index.items()):
    date_index = commitment_start_dates.index(v['start_date'])
    commitment_count_date[date_index]+=1

In [None]:
commitment_count_date

In [None]:
plt.plot(commitment_start_dates,commitment_count_date)

### month level

In [None]:
month_values = []
for y in range(2007,2024):
    for m in range(1,13):
        month_values.append(datetime(y,m,1)) 

In [None]:
commitment_count_date = np.zeros(len(month_values))
for k,v in tqdm(commitment_index.items()):
    start_m = datetime(v['start_date'].year,v['start_date'].month,1)
    date_index = month_values.index(start_m)
    commitment_count_date[date_index]+=1

In [None]:
plt.plot(month_values,commitment_count_date)

## Length of commitment

In [None]:
commitment_lengths = [v['length_in_days'] for k,v in commitment_index.items()]

In [None]:
min(commitment_lengths),max(commitment_lengths)

In [None]:
sorted(Counter(commitment_lengths).items(), key=lambda x:x[1],reverse=True)

In [None]:
plt.hist(commitment_lengths)
plt.yscale('log')

In [None]:
plt.hist([i for i in commitment_lengths if i<366], bins = [i for i in range(0,366,7)])

### ALL

In [None]:
unique_commitment_lengths = list(set(commitment_lengths))

In [None]:
unique_commitment_lengths.sort()

In [None]:
len(unique_commitment_lengths)

In [None]:
commitment_length_CDF = np.zeros(len(unique_commitment_lengths))
for k,v in tqdm(commitment_index.items()):
    count_idx = unique_commitment_lengths.index(v['length_in_days'])
    commitment_length_CDF[count_idx:]+=1

In [None]:
commitment_length_CDF/=len(commitment_index)

In [None]:
plt.plot(unique_commitment_lengths,commitment_length_CDF,label ='Commitments length')
plt.legend()

### 365 Days

In [None]:
unique_commitment_lengths = [i for i in range(0,366)]

In [None]:
unique_commitment_lengths.sort()

In [None]:
len(unique_commitment_lengths)

In [None]:
commitment_length_CDF = np.zeros(len(unique_commitment_lengths))
for k,v in tqdm(commitment_index.items()):
    if v['length_in_days']<=365:
        count_idx = unique_commitment_lengths.index(v['length_in_days'])
        commitment_length_CDF[count_idx:]+=1

In [None]:
commitment_length_CDF/=len(commitment_index)

In [None]:
plt.plot(unique_commitment_lengths,commitment_length_CDF,label ='Commitments length')
plt.legend()

### Converted into weeks

In [None]:
commitment_lengths = [int(i/7.0) for i in commitment_lengths]

In [None]:
unique_commitment_lengths = list(set(commitment_lengths))

In [None]:
unique_commitment_lengths.sort()

In [None]:
len(unique_commitment_lengths)

In [None]:
commitment_length_CDF = np.zeros(len(unique_commitment_lengths))
for k,v in tqdm(commitment_index.items()):
    count_idx = unique_commitment_lengths.index(int(v['length_in_days']/7.0))
    commitment_length_CDF[count_idx:]+=1

In [None]:
commitment_length_CDF/=len(commitment_index)

In [None]:
plt.plot(unique_commitment_lengths,commitment_length_CDF,label ='Commitments length')
plt.legend()

### 52 weeks

In [None]:
unique_commitment_lengths = [i for i in range(0,53)]

In [None]:
len(unique_commitment_lengths)

In [None]:
commitment_length_CDF = np.zeros(len(unique_commitment_lengths))
for k,v in tqdm(commitment_index.items()):
    if int(v['length_in_days']/7.0)<=52:
        count_idx = unique_commitment_lengths.index(int(v['length_in_days']/7.0))
        commitment_length_CDF[count_idx:]+=1

In [None]:
commitment_length_CDF/=len(commitment_index)

In [None]:
plt.plot(unique_commitment_lengths,commitment_length_CDF,label ='Commitments length')
plt.legend()

# Date of start

In [None]:
day_of_start = [v['start_date'].day for k,v in commitment_index.items()]

In [None]:
counts = list(Counter(day_of_start).items())

In [None]:
counts.sort()

In [None]:
day = []
count = []
for i,j in counts:
    day.append(i)
    count.append(j)

In [None]:
plt.bar(day,count)

In [None]:
month_of_start = [v['start_date'].month for k,v in commitment_index.items()]

In [None]:
counts = list(Counter(month_of_start).items())

In [None]:
counts.sort()

In [None]:
month = []
count = []
for i,j in counts:
    month.append(i)
    count.append(j)

In [None]:
plt.bar(month,count)

In [None]:
day_of_year = []
count = []
for i in range(1,13):
    for j in range(1,32):
        day_of_year.append(f"{j}_{i}")
        count.append(0)

In [None]:
for k,v in commitment_index.items():
    idx = day_of_year.index(f"{v['start_date'].day}_{v['start_date'].month}")
    count[idx]+=1

In [None]:
plt.bar(day_of_year,count)

In [None]:
weekday_of_start = [v['start_date'].weekday() for k,v in commitment_index.items()]

In [None]:
counts = list(Counter(weekday_of_start).items())

In [None]:
counts.sort()

In [None]:
counts

In [None]:
day = []
count = []
for i,j in counts:
    day.append(i)
    count.append(j)

In [None]:
plt.bar(day,count)

# reporting Interval 

In [None]:
reporting_intervals = [int(v['days_per_reporting']) for k,v in commitment_index.items() if v['no_of_reports']>1]

In [None]:
len(reporting_intervals)

In [None]:
len(set(reporting_intervals))

In [None]:
counts = list(Counter(reporting_intervals).items())

In [None]:
counts.sort()

In [None]:
counts

In [None]:
day = []
count = []
for i,j in counts:
    day.append(i)
    count.append(j)

In [None]:
plt.bar(day,count)

In [None]:
plt.bar(day,count)
plt.yscale('log')

# Stake Type 

In [None]:
stake_type = [v['type_of_stake'] for k,v in commitment_index.items()]

In [None]:
len(stake_type),len(set(stake_type))

In [None]:
stake_table = []
for k,v in Counter(stake_type).items():
    stake_table.append((k,v,100*v/len(commitment_index)))

In [None]:
pd.DataFrame(stake_table,columns=['Type','Count','%']).sort_values(by = '%')

## Amount of Stake

### Total stake

In [None]:
total_stake = [v['total_stake'] for k,v in commitment_index.items() if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake'])]

In [None]:
sorted(Counter(total_stake).items(), key=lambda x:x[0],reverse=False)

In [None]:
plt.hist(total_stake, bins = [0.0,5.0,10.0] + [i for i in range(20,101,10)] + [i for i in range(200,1000,100)] + [i for i in range(2000,int(max(total_stake)+1),1000)])

In [None]:
plt.hist(total_stake)
plt.yscale('log')

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
total_users_with_stake = 0
for k,v in commitment_index.items():
    if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake']):
        total_users_with_stake+=1
total_users_with_stake

In [None]:
total_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake']):
        count_index = unique_total_stake.index(v['total_stake'])
        total_stake_CDF[count_index:]+=1

In [None]:
total_stake_CDF/=total_users_with_stake

In [None]:
plt.plot(unique_total_stake,total_stake_CDF,label ='Total Stakes')
plt.legend()

### < 1000

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
unique_total_stake = [i for i in unique_total_stake if i<=1000.0]

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
total_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake']) and (v['total_stake']<=1000.0):
        count_index = unique_total_stake.index(v['total_stake'])
        total_stake_CDF[count_index:]+=1

In [None]:
total_stake_CDF/=total_users_with_stake

In [None]:
plt.plot(unique_total_stake,total_stake_CDF,label ='Total Stakes')
plt.legend()

#### Amount VS Stake

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
stickK_users_with_stake = 0
Money_to_a_friend_users_with_stake = 0
Charity_users_with_stake = 0
Anti_charity_users_with_stake = 0
for k,v in commitment_index.items():
    if v['type_of_stake'] == 'stickK':
        stickK_users_with_stake+=1
    elif v['type_of_stake'] == 'Money to a friend':
        Money_to_a_friend_users_with_stake+=1
    elif v['type_of_stake'] == 'Charity':
        Charity_users_with_stake+=1
    elif v['type_of_stake'] == 'Anti-charity':
        Anti_charity_users_with_stake+=1

stickK_users_with_stake,Money_to_a_friend_users_with_stake,Charity_users_with_stake,Anti_charity_users_with_stake

In [None]:
stickK_stake_CDF = np.zeros(len(unique_total_stake))
Money_to_a_friend_stake_CDF = np.zeros(len(unique_total_stake))
Charity_stake_CDF = np.zeros(len(unique_total_stake))
Anti_charity_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if v['type_of_stake'] == 'stickK':
        count_index = unique_total_stake.index(v['total_stake'])
        stickK_stake_CDF[count_index:]+=1
    elif v['type_of_stake'] == 'Money to a friend':
        count_index = unique_total_stake.index(v['total_stake'])
        Money_to_a_friend_stake_CDF[count_index:]+=1
    elif v['type_of_stake'] == 'Charity':
        count_index = unique_total_stake.index(v['total_stake'])
        Charity_stake_CDF[count_index:]+=1
    elif v['type_of_stake'] == 'Anti-charity':
        count_index = unique_total_stake.index(v['total_stake'])
        Anti_charity_stake_CDF[count_index:]+=1

In [None]:
stickK_stake_CDF/=stickK_users_with_stake
Money_to_a_friend_stake_CDF/=Money_to_a_friend_users_with_stake
Charity_stake_CDF/=Charity_users_with_stake
Anti_charity_stake_CDF/=Anti_charity_users_with_stake

In [None]:
plt.plot(unique_total_stake,stickK_stake_CDF,label ='Stickk')
plt.plot(unique_total_stake,Money_to_a_friend_stake_CDF,label ='M to F')
plt.plot(unique_total_stake,Charity_stake_CDF,label ='Charity')
plt.plot(unique_total_stake,Anti_charity_stake_CDF,label ='Anti Charity')
plt.legend()

<1000

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
unique_total_stake = [i for i in unique_total_stake if i<=1000.0]

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
stickK_stake_CDF = np.zeros(len(unique_total_stake))
Money_to_a_friend_stake_CDF = np.zeros(len(unique_total_stake))
Charity_stake_CDF = np.zeros(len(unique_total_stake))
Anti_charity_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if v['total_stake']<=1000.0:
        if v['type_of_stake'] == 'stickK':
            count_index = unique_total_stake.index(v['total_stake'])
            stickK_stake_CDF[count_index:]+=1
        elif v['type_of_stake'] == 'Money to a friend':
            count_index = unique_total_stake.index(v['total_stake'])
            Money_to_a_friend_stake_CDF[count_index:]+=1
        elif v['type_of_stake'] == 'Charity':
            count_index = unique_total_stake.index(v['total_stake'])
            Charity_stake_CDF[count_index:]+=1
        elif v['type_of_stake'] == 'Anti-charity':
            count_index = unique_total_stake.index(v['total_stake'])
            Anti_charity_stake_CDF[count_index:]+=1

In [None]:
stickK_stake_CDF/=stickK_users_with_stake
Money_to_a_friend_stake_CDF/=Money_to_a_friend_users_with_stake
Charity_stake_CDF/=Charity_users_with_stake
Anti_charity_stake_CDF/=Anti_charity_users_with_stake

In [None]:
plt.plot(unique_total_stake,stickK_stake_CDF,label ='Stickk')
plt.plot(unique_total_stake,Money_to_a_friend_stake_CDF,label ='M to F')
plt.plot(unique_total_stake,Charity_stake_CDF,label ='Charity')
plt.plot(unique_total_stake,Anti_charity_stake_CDF,label ='Anti Charity')
plt.legend()

### Stake per period

In [None]:
total_stake = [v['stake_per_period'] for k,v in commitment_index.items() if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake'])]

In [None]:
sorted(Counter(total_stake).items(), key=lambda x:x[0],reverse=False)

In [None]:
plt.hist(total_stake)

In [None]:
plt.hist(total_stake, bins = [0.0,5.0,10.0] + [i for i in range(20,101,10)] + [i for i in range(200,1000,100)])

In [None]:
plt.hist(total_stake, bins = [i for i in range(0,101,5)])

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
total_users_with_stake = 0
for k,v in commitment_index.items():
    if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake']):
        total_users_with_stake+=1
total_users_with_stake

In [None]:
total_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake']):
        count_index = unique_total_stake.index(v['stake_per_period'])
        total_stake_CDF[count_index:]+=1

In [None]:
total_stake_CDF/=total_users_with_stake

In [None]:
plt.plot(unique_total_stake,total_stake_CDF,label ='Stakes per period')
plt.legend()

### < 100

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
unique_total_stake = [i for i in unique_total_stake if i<=100.0]

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
total_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if (v['type_of_stake']!='No stakes') and ('reverted' not in v['type_of_stake']) and (v['stake_per_period']<=100.0):
        count_index = unique_total_stake.index(v['stake_per_period'])
        total_stake_CDF[count_index:]+=1

In [None]:
total_stake_CDF/=total_users_with_stake

In [None]:
plt.plot(unique_total_stake,total_stake_CDF,label ='Total Stakes')
plt.legend()

#### Amount VS Stake

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
stickK_users_with_stake = 0
Money_to_a_friend_users_with_stake = 0
Charity_users_with_stake = 0
Anti_charity_users_with_stake = 0
for k,v in commitment_index.items():
    if v['type_of_stake'] == 'stickK':
        stickK_users_with_stake+=1
    elif v['type_of_stake'] == 'Money to a friend':
        Money_to_a_friend_users_with_stake+=1
    elif v['type_of_stake'] == 'Charity':
        Charity_users_with_stake+=1
    elif v['type_of_stake'] == 'Anti-charity':
        Anti_charity_users_with_stake+=1

stickK_users_with_stake,Money_to_a_friend_users_with_stake,Charity_users_with_stake,Anti_charity_users_with_stake

In [None]:
stickK_stake_CDF = np.zeros(len(unique_total_stake))
Money_to_a_friend_stake_CDF = np.zeros(len(unique_total_stake))
Charity_stake_CDF = np.zeros(len(unique_total_stake))
Anti_charity_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if v['type_of_stake'] == 'stickK':
        count_index = unique_total_stake.index(v['stake_per_period'])
        stickK_stake_CDF[count_index:]+=1
    elif v['type_of_stake'] == 'Money to a friend':
        count_index = unique_total_stake.index(v['stake_per_period'])
        Money_to_a_friend_stake_CDF[count_index:]+=1
    elif v['type_of_stake'] == 'Charity':
        count_index = unique_total_stake.index(v['stake_per_period'])
        Charity_stake_CDF[count_index:]+=1
    elif v['type_of_stake'] == 'Anti-charity':
        count_index = unique_total_stake.index(v['stake_per_period'])
        Anti_charity_stake_CDF[count_index:]+=1

In [None]:
stickK_stake_CDF/=stickK_users_with_stake
Money_to_a_friend_stake_CDF/=Money_to_a_friend_users_with_stake
Charity_stake_CDF/=Charity_users_with_stake
Anti_charity_stake_CDF/=Anti_charity_users_with_stake

In [None]:
plt.plot(unique_total_stake,stickK_stake_CDF,label ='Stickk')
plt.plot(unique_total_stake,Money_to_a_friend_stake_CDF,label ='M to F')
plt.plot(unique_total_stake,Charity_stake_CDF,label ='Charity')
plt.plot(unique_total_stake,Anti_charity_stake_CDF,label ='Anti Charity')
plt.legend()

### <100

In [None]:
unique_total_stake = list(set(total_stake))

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
unique_total_stake = [i for i in unique_total_stake if i<=100.0]

In [None]:
unique_total_stake.sort()

In [None]:
len(unique_total_stake)

In [None]:
stickK_stake_CDF = np.zeros(len(unique_total_stake))
Money_to_a_friend_stake_CDF = np.zeros(len(unique_total_stake))
Charity_stake_CDF = np.zeros(len(unique_total_stake))
Anti_charity_stake_CDF = np.zeros(len(unique_total_stake))
for k,v in tqdm(commitment_index.items()):
    if v['stake_per_period']<=100.0:
        if v['type_of_stake'] == 'stickK':
            count_index = unique_total_stake.index(v['stake_per_period'])
            stickK_stake_CDF[count_index:]+=1
        elif v['type_of_stake'] == 'Money to a friend':
            count_index = unique_total_stake.index(v['stake_per_period'])
            Money_to_a_friend_stake_CDF[count_index:]+=1
        elif v['type_of_stake'] == 'Charity':
            count_index = unique_total_stake.index(v['stake_per_period'])
            Charity_stake_CDF[count_index:]+=1
        elif v['type_of_stake'] == 'Anti-charity':
            count_index = unique_total_stake.index(v['stake_per_period'])
            Anti_charity_stake_CDF[count_index:]+=1

In [None]:
stickK_stake_CDF/=stickK_users_with_stake
Money_to_a_friend_stake_CDF/=Money_to_a_friend_users_with_stake
Charity_stake_CDF/=Charity_users_with_stake
Anti_charity_stake_CDF/=Anti_charity_users_with_stake

In [None]:
plt.plot(unique_total_stake,stickK_stake_CDF,label ='Stickk')
plt.plot(unique_total_stake,Money_to_a_friend_stake_CDF,label ='M to F')
plt.plot(unique_total_stake,Charity_stake_CDF,label ='Charity')
plt.plot(unique_total_stake,Anti_charity_stake_CDF,label ='Anti Charity')
plt.legend()

## Refree 

In [None]:
refree = [v['self_refree'] for k,v in commitment_index.items()]

In [None]:
for k,v in Counter(refree).items():
    print(k,v,v*100/len(commitment_index))

### Support

In [None]:
supporters = [v['no_of_supporters'] for k,v in commitment_index.items()]

In [None]:
min(supporters),max(supporters)

In [None]:
Counter(supporters)

## Reporting Success, Not Reported, not successfull

In [None]:
sucessful = []
not_sucessful = []
not_reported = []
for k,v in commitment_index.items():
    sucessful.append(v['sucessful']*100/v['no_of_reports'])
    not_sucessful.append(v['not_sucessful']*100/v['no_of_reports'])
    not_reported.append(v['not_reported']*100/v['no_of_reports'])

In [None]:
plt.hist(sucessful,bins = [i for i in range(0,101,5)])

In [None]:
plt.hist(not_sucessful,bins = [i for i in range(0,101,5)])

In [None]:
plt.hist(not_reported,bins = [i for i in range(0,101,5)])

In [None]:
unique_sucessful = list(set(sucessful))
unique_not_sucessful = list(set(not_sucessful))
unique_not_reported = list(set(not_reported))

In [None]:
unique_sucessful.sort()
unique_not_sucessful.sort()
unique_not_reported.sort()

In [None]:
len(unique_sucessful),len(unique_not_sucessful),len(unique_not_reported)

In [None]:
sucessful_CDF = np.zeros(len(unique_sucessful))
not_sucessful_CDF = np.zeros(len(unique_not_sucessful))
not_reported_CDF = np.zeros(len(unique_not_reported))
for k,v in tqdm(commitment_index.items()):
    count_index = unique_sucessful.index(v['sucessful']*100/v['no_of_reports'])
    sucessful_CDF[count_index:]+=1
    
    count_index = unique_not_sucessful.index(v['not_sucessful']*100/v['no_of_reports'])
    not_sucessful_CDF[count_index:]+=1
    
    count_index = unique_not_reported.index(v['not_reported']*100/v['no_of_reports'])
    not_reported_CDF[count_index:]+=1

In [None]:
sucessful_CDF/=len(commitment_index)
not_sucessful_CDF/=len(commitment_index)
not_reported_CDF/=len(commitment_index)

In [None]:
plt.plot(unique_sucessful,sucessful_CDF,label ='Success')
plt.plot(unique_not_sucessful,not_sucessful_CDF,label ='not Success')
plt.plot(unique_not_reported,not_reported_CDF,label ='not reported')
plt.legend()

## Length vs Success

In [None]:
duration = []
success_percentage = []

for k,v in commitment_index.items():
    duration.append(v['length_in_days'])
    success_percentage.append(v['sucessful']*100/v['no_of_reports'])

In [None]:
plt.scatter(duration,success_percentage)

In [None]:
plt.scatter(duration,success_percentage)
plt.xscale('log')

In [None]:
plt.scatter(duration,success_percentage)
plt.xlim((0,365))

In [None]:
plt.scatter(duration,success_percentage)
plt.xlim((0,100))

## Length vs Not reported

In [None]:
duration = []
not_reported_percentage = []

for k,v in commitment_index.items():
    duration.append(v['length_in_days'])
    not_reported_percentage.append(v['not_reported']*100/v['no_of_reports'])

In [None]:
plt.scatter(duration,not_reported_percentage)

In [None]:
plt.scatter(duration,not_reported_percentage)
plt.xscale('log')

In [None]:
plt.scatter(duration,not_reported_percentage)
plt.xlim((0,365))

In [None]:
plt.scatter(duration,not_reported_percentage)
plt.xlim((0,100))

## Stake vs Success

In [None]:
stake = []
success_percentage = []

for k,v in commitment_index.items():
    stake.append(v['stake_per_period'])
    success_percentage.append(v['sucessful']*100/v['no_of_reports'])

In [None]:
plt.scatter(stake,success_percentage)

In [None]:
plt.scatter(stake,success_percentage)
plt.xscale('log')

In [None]:
plt.scatter(stake,success_percentage)
plt.xlim((0,365))

In [None]:
plt.scatter(stake,success_percentage)
plt.xlim((0,100))

# Type of Stake vs success

In [None]:
stake_data = defaultdict(lambda: {"sucessful":[],"not_sucessful":[],"not_reported":[]})
for k,v in commitment_index.items():
    stake_data[v['type_of_stake']]['sucessful'].append(v['sucessful']*100/v['no_of_reports'])
    stake_data[v['type_of_stake']]['not_sucessful'].append(v['not_sucessful']*100/v['no_of_reports'])
    stake_data[v['type_of_stake']]['not_reported'].append(v['not_reported']*100/v['no_of_reports'])

In [None]:
raw_data = []
for k,v in stake_data.items():
    temp = [k]
    for i in ['sucessful','not_sucessful','not_reported']:
        temp.append(f'{np.mean(v[i]):.2f} ± {np.std(v[i])}')
    raw_data.append(temp)

In [None]:
pd.DataFrame(raw_data,columns=['Type',"Success", "Not Success" , "Not reported"]).sort_values(by = 'Success', ascending=False)

# Title Word cloud

In [None]:
titles = [v['title'] for k,v in commitment_index.items()]

In [None]:
titles = [i.lower() for i in titles]

In [None]:
titles = [i for i in titles if i!=""]

In [None]:
titles = [i.translate(str.maketrans('', '', string.punctuation)) for i in titles]

In [None]:
tokens = []
for i in titles:
    for k in i.split(" "):
        tokens.append(k)

In [None]:
wc_text = " ".join(tokens)

In [None]:
len(wc_text)

In [None]:
wordcloud = WordCloud(background_color="white").generate(wc_text)

In [None]:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()