In [2]:
import os
from datetime import datetime, timedelta
from collections import defaultdict, Counter
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import Counter

In [3]:
# Importing Data
DATADIR = "D:\My Computer\Local H\Skoltech Docs\Courses\HackLab\hackathon_data" # "./data"
transactions_path = f"{DATADIR}/avk_hackathon_data_transactions.csv"
df = pd.read_csv(f"{DATADIR}/avk_hackathon_data_transactions.csv")

In [4]:
df

Unnamed: 0,party_rk,account_rk,financial_account_type_cd,transaction_dttm,transaction_type_desc,transaction_amt_rur,merchant_rk,merchant_type,merchant_group_rk,category
0,20337,19666,1,2019-01-01,Покупка,84.00,88676.0,348.0,,Сувениры
1,63404,72991,1,2019-01-01,Покупка,410.00,887248.0,330.0,725.0,Фаст Фуд
2,24789,23517,2,2019-01-01,Покупка,701.44,830014.0,291.0,,Супермаркеты
3,57970,64838,2,2019-01-01,Покупка,6203.70,363834.0,278.0,454.0,Дом/Ремонт
4,12232,11591,2,2019-01-01,Покупка,734.53,85919.0,286.0,878.0,Супермаркеты
...,...,...,...,...,...,...,...,...,...,...
11987612,44990,111641,1,2019-12-31,Покупка,121.00,1940970.0,330.0,675.0,Фаст Фуд
11987613,53023,57593,2,2019-12-31,Покупка,102.29,1198564.0,286.0,,Супермаркеты
11987614,78716,99133,1,2019-12-31,Покупка,175.00,2688766.0,229.0,901.0,Транспорт
11987615,73104,91039,2,2019-12-31,Покупка,185.99,1965313.0,286.0,878.0,Супермаркеты


# Personalized Client Experience: A Rule Based Approach for Advice System
In this section, we are concerned with the **categories** in which most the client transactions lie in. Through this valuable piece of information we can know the **spending priorities** of the client, and accordingly, enhance the clients experience through **promotions** and **advices on saving**.

First, we determined the top 5 spending priorities for all of the clients provided within the dataset, then we determined the top 5 spending priorities for each client in order to **personalize** the client experience which accordingly leads to **client loyalty**.

In [53]:
def Personalized_Experience(transactions_data,n = 5):
    
    df = transactions_data
    categories_count = df['category'].value_counts()
    global_priority = categories_count.nlargest(n).keys()
    top_n_categories = {}
    IDs = df['party_rk'].unique()

    for i in IDs:
        df_id = df[df['party_rk'] == i]
        counts = df_id['category'].value_counts().nlargest(n)
        top_n_categories.update({i : counts})
   
    Personalized_Advice = pd.DataFrame(columns = ['party_rk','Promotion','Saving'])

    for i in IDs:
        local_priority = top_n_categories[i].keys().tolist()
        personalized_priority = list((Counter(local_priority)-Counter(global_priority)).elements())
        saving_advice = list(set(local_priority) & set(global_priority))
        Personalized_Advice = Personalized_Advice.append({'party_rk': i,'Promotion':personalized_priority,'Saving':saving_advice},ignore_index=True)
        
    return global_priority,Personalized_Advice

In [54]:
global_priority,Personalized_Advice = Personalized_Experience(df)

# Personalized Advice for Better Client Experience
The function `Personalized_Experience(df)` is a function that takes as an input all the provided data on transactions and has 2 outputs:
1. **Global Spending Priority**: Which is basically the top 5 **spending priorities** for all clients.
2. **Personalized Advice**: Which is a dataframe that can help **Tinkoff** in understanding the **distinct priorities** for each client. This in turn can be useful in **offering promotions** to the client. In addition to promotions, **Tinkoff** can also advise the client on **savings**, as it may happen that some of the client's top 5 priorities could be of the  **global/main stream** priorities, which are actually not necessarily **true priorities**, instead, they can be a result of client-client relationships or general social culture.

In [57]:
# 5 Most categories where clients spend on
list(global_priority)

['Супермаркеты', 'Фаст Фуд', 'Транспорт', 'Финансовые услуги', 'Разные товары']

In [55]:
# Personalized advice for each customer according to their needs
Personalized_Advice

Unnamed: 0,party_rk,Promotion,Saving
0,20337,"[Топливо, Аптеки]","[Финансовые услуги, Фаст Фуд, Супермаркеты]"
1,63404,[Одежда/Обувь],"[Финансовые услуги, Разные товары, Фаст Фуд, С..."
2,24789,[Дом/Ремонт],"[Разные товары, Финансовые услуги, Транспорт, ..."
3,57970,[Дом/Ремонт],"[Финансовые услуги, Фаст Фуд, Транспорт, Супер..."
4,12232,"[Рестораны, Аптеки, Одежда/Обувь]","[Фаст Фуд, Супермаркеты]"
...,...,...,...
49995,92577,"[Рестораны, Связь/Телеком]","[Финансовые услуги, Фаст Фуд, Супермаркеты]"
49996,89751,"[Дом/Ремонт, Топливо]","[Финансовые услуги, Фаст Фуд, Супермаркеты]"
49997,30676,"[Дом/Ремонт, Аптеки, Одежда/Обувь]","[Разные товары, Супермаркеты]"
49998,4637,"[Дом/Ремонт, Аптеки]","[Разные товары, Фаст Фуд, Супермаркеты]"


In [65]:
# Top personalized prioritized categories
Personalized_Advice['Promotion'].explode().value_counts().nlargest()

Топливо       16341
Аптеки        12535
Наличные      11239
Рестораны     11194
Дом/Ремонт    11162
Name: Promotion, dtype: int64