In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [2]:
# Read product groups and train file
train = pd.read_csv("../data/train.csv")
product_groups = pd.read_csv("../data/product_groups.csv")
product_groups = product_groups.astype(object)

train

Unnamed: 0,individualnumber,category_number,hakkedis_amt,odul_amt,response
0,94230288,9000,21.0,2.0,0
1,4684087,9000,17.0,1.0,0
2,92472145,9058,24.0,3.0,0
3,88026681,9030,22.0,2.0,0
4,98127795,9001,38.0,3.0,0
...,...,...,...,...,...
13110,97214433,9059,27.0,2.0,0
13111,96841665,9004,65.0,6.0,0
13112,98445787,9044,33.0,3.0,0
13113,13781030,9004,66.0,6.0,0


In [3]:
# Build a mapping = category_level1, level2, level3, level4 -> category_number

product_groups["hash"] = product_groups["category_level_1"].astype(str) + (product_groups["category_level_2"].astype(str)) + (product_groups["category_level_3"].astype(str)) + (product_groups["category_level_4"].astype(str))
hash_to_category_number = {}
category_number_to_hash = {}


# Keep hash as the {[first category level][second_category_level[0]]}
for index, row in product_groups[["category_number", "hash"]].iterrows():
    hash = row[1]
    if hash not in hash_to_category_number:
        hash_to_category_number[hash] = []

    if row["category_number"] not in hash_to_category_number[hash]:
        hash_to_category_number[hash].append(row["category_number"])

sum = 0
for hash in hash_to_category_number:
    if len(hash_to_category_number[hash]) > 1:
        sum += 1

for row in product_groups[["category_number", "hash"]].itertuples():
    hash = row[2]
    category_number = row[1]
    if category_number not in category_number_to_hash:
        category_number_to_hash[category_number] = []
    
    if hash not in category_number_to_hash[category_number]:
        category_number_to_hash[category_number].append(hash)
    
#for category_number in category_number_to_hash:
#    print(f"{category_number}: {category_number_to_hash[category_number]}")

for hash in hash_to_category_number:
    print(f"{hash}: {hash_to_category_number[hash]}")

341201010: [9046]
125251010: [9009]
410652010: [9049]
300101911: [9019, 9035]
305311010: [9035]
335751010: [9044]
313111010: [9056]
425451010: [9057]
616301210: [9052]
305211010: [9035]
616401510: [9055]
636411510: [9038]
660401027: [9054]
220451010: [9032]
110101010: [9001]
613351010: [9011]
628251015: [9030]
67040355: [9053]
603251010: [9000]
410507010: [9049]
335611010: [9041, 9044]
410251514: [9049]
650551035: [9044]
310401010: [9038]
220602010: [9060]
425101010: [9049]
410351010: [9049]
305101010: [9035]
646151515: [9042]
313161515: [9056]
311601010: [9037]
131301010: [9006]
300201115: [9035]
609501025: [9001]
335551035: [9044]
644301010: [9037]
617551015: [9058]
603301010: [9000]
420751510: [9052]
657601015: [9049]
340401010: [9046]
617651010: [9012]
657351111: [9049]
103251010: [9000]
200151015: [9022]
335601010: [9044]
300231010: [9035]
657151010: [9049]
125502510: [9012]
410401140: [9049]
665451011: [9057]
312101520: [9040]
425501010: [9057, 9049]
330151510: [9043]
650601010: 

RESULTS:

There are 151 hashes pointing to at least 2 different categories.

There are 010 hashes pointing to at least 3 different categories.

There are 002 hashes pointing to at least 4 different categories.


In [4]:
# Get card number - individual number match:

shopping_by_card_number = pd.read_csv("../data/transaction_header.csv")
shopping_by_card_number = shopping_by_card_number.astype(object)
print(shopping_by_card_number.describe()) # Each basket id is unique 
print()

cardnumber_to_individualnumber = pd.read_csv("../data/customeraccount.csv")
#cardnumber_to_individualnumber.index = cardnumber_to_individualnumber["cardnumber"]

cardnumber_to_individualnumber = cardnumber_to_individualnumber.astype(object)
print(cardnumber_to_individualnumber.describe()) # Card number is unique,
# But some individuals have more than one cards: 28593 individuals, 35159 cards.
#cardnumber_to_individualnumber.drop(columns="cardnumber", inplace=True)
cardnumber_to_individualnumber

       date_of_transaction        cardnumber           basketid  is_sanal
count              1124673           1124673            1124673   1124673
unique                 366             30319            1124673         2
top             2020-12-31  9887854616568815  20120163990041163         0
freq                  5032              1239                  1   1024395

        individualnumber        cardnumber
count              35159             35159
unique             28593             35159
top             96035559  7287134635560315
freq                  49                 1


Unnamed: 0,individualnumber,cardnumber
0,16481068,7287134635560315
1,17634947,6587004647560415
2,19323290,8187034648564315
3,19323290,3005502487247749
4,37820213,6287114610560316
...,...,...
35154,100998656,2687784602565315
35155,101074070,6387894671560235
35156,101270229,9287054697566815
35157,101328888,2587064608564915


In [5]:
# Get: 
# basket id - individual number match
# individual_number - sanal and non-sanal shopping counts match
# individual_number - days_since_last_shopping match

# Basket id - individual number match:
shopping_by_individual_number = pd.merge(shopping_by_card_number,cardnumber_to_individualnumber,how='inner',on='cardnumber')
shopping_by_individual_number = shopping_by_individual_number[["individualnumber", "basketid", "is_sanal", "date_of_transaction"]]

basket_id_to_individual_number = shopping_by_individual_number[["basketid", "individualnumber"]].set_index("basketid")
basket_id_to_individual_number = basket_id_to_individual_number["individualnumber"].to_dict()
basket_id_to_individual_number

# Individual_number - sanal, non-sanal shopping counts:
individual_shopping_details = pd.DataFrame()

by_individual = shopping_by_individual_number.groupby("individualnumber")
individual_sanal_mean = by_individual["is_sanal"].mean().to_dict()

individual_shopping_details["individualnumber"] = individual_sanal_mean.keys()
individual_shopping_details["sanal_percent"] = individual_sanal_mean.values()
individual_shopping_details["shop_count"] = by_individual["basketid"].count().values

# Individual_number - months_since_last_shopping (from 2022 january):

last_shop_dates = by_individual["date_of_transaction"].max()
individual_to_months_since_last_shopping = {}
for individual, last_date in last_shop_dates.items():
    last_date_str = str(last_date)
    if last_date_str == "2021-12-01":
        last_date_str = "2021-11-30" 
    
    year = int(last_date_str[:4])
    month = int(last_date_str[5:7])
    months_since_last_shopping = 12 * (2021 - year) + (12 - month)
    individual_to_months_since_last_shopping[individual] = months_since_last_shopping

individual_shopping_details["months_since_last_shopping"] = individual_to_months_since_last_shopping.values()

# Resulting df:
individual_shopping_details

Unnamed: 0,individualnumber,sanal_percent,shop_count,months_since_last_shopping
0,30690,0.000000,42,1
1,294867,0.160000,50,1
2,322878,0.138889,72,1
3,356411,0.265823,79,1
4,374368,0.166667,90,1
...,...,...,...,...
28571,102278919,0.000000,1,1
28572,102279267,1.000000,1,1
28573,102280492,0.000000,1,1
28574,102280832,0.000000,2,1


In [6]:
# Read transaction sale file:
transaction_sale = pd.read_csv("../data/transaction_sale/transaction_sale.csv")

# Some discount_type_1 entries are empty: fill them with 0
transaction_sale["discount_type_1"].fillna(0, inplace=True)
transaction_sale

Unnamed: 0,basketid,category_level_1,category_level_2,category_level_3,category_level_4,amount,quantity,discount_type_1,discount_type_2,discount_type_3
0,20120706070177471,613,50,50,15,19.90,2.0,0.0,0.0,0.00
1,20120706070177471,425,30,12,10,3.50,1.0,0.0,0.0,0.00
2,20120706070177471,425,30,12,11,7.90,1.0,0.0,0.0,0.00
3,20122703940031323,103,25,15,10,7.90,2.0,0.0,0.0,0.00
4,20122703940031323,101,15,15,15,16.00,1.0,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...
6537876,20120839080026509,103,35,10,10,2.95,1.0,0.0,0.0,0.00
6537877,20120839080026509,130,85,11,10,3.40,1.0,0.0,0.0,0.00
6537878,20120839080026509,131,10,10,20,4.45,1.0,0.0,0.0,0.00
6537879,21101831040099839,103,35,10,10,0.01,1.0,0.0,0.0,1.24


* Implemented three methods for storing the shopping history of a customer.
* Store Type 3 is used in current implementation

### Store Type 1: _Store each buy seperately_

In this format data is stored in the following way:

    {
        "individual_number_1": [
            {"category_1": 130, "category_2": 45, "category_3": 10, "category_4": 20, "amount": 19.90, "quantity": 1.000}
            {"category_1": 150, "category_2": 15, "category_3": 15, "category_4": 25, "amount": 3.50, "quantity": 1.430}
        ]
        "individual_number_2": [
            {"category_1": 617, "category_2": 25, "category_3": 15, "category_4": 10, "amount": 8.75, "quantity": 2.000}
        ]
    }

### Store Type 2: _Store by total volume of the buy, classify them according to category level 1 and category level 2_

In this format data is stored in the following way:

    {
        "individual_number_1": {

            613: {
                50: [3.0, 49.75], 
                40: [11.0, 248.65], 
                15: [2.0, 8.9]
            }, 

            425: {
                30: [2.0, 11.4]
            }, 
            
            130: {
                85: [10.0, 165.06]
            }
        }
    }

* First key represents the category level 1.
* Second key represents the category level 2.
* Inner list stores the quantity and total money spent on that combination of category_1, category_2:
* [quantity, total_amount]

### Store Type 3: _Store by total volume of the buy, classify them according to category number used in campaigns_

In this format data is stored in the following way:

    {
        "individual_number_1": {

            9049: [47.89, 3054.63],
            9013: [xx.xx, xxxx.xx],
            ...
            "unknown": [xxx.xx, xxxxx.xx]
        }
        
        "individual_number_2": {

            9049: [16.23, 2045.82],
            9013: [xx.xx, xxxx.xx],
            ...
            "unknown": [xxx.xx, xxxxx.xx]
        }        
    }

* Category level's are converted to category number used in campaigns by using category_level_1 and category_level_2.
* This method may further change.

In [7]:
# To be fixed:
    # Round quantity and total price data

# Store type 1: ~90s to execute.
# Store type 2: ~25s to execute.
# Store type 3: ~30s to execute.

# Select store type: 1 or 2 or 3: (0 for to see individual_shopping_volume and individual_to_total_discount only)
store_type = 3

individual_shopping_information = {}
individual_shopping_volume = {} # Simple dictionary: {"individual_number": total_money_spent}
individual_to_total_discount = {} # Simple dictionary: {"individual_number": total_discount}

# Read the transaction_sale file and save each customer's shopping history in the "individual_shopping_information" dictionary.
for row in transaction_sale.itertuples(): 
    # Get necessary features:

    basket_id = row[1]
    category_1 = row[2]
    category_2 = row[3]
    category_3 = row[4]
    category_4 = row[5]
    amount = row[6]
    quantity = row[7]
    discount_1 = row[8]
    discount_2 = row[9]
    discount_3 = row[10]
    
    # Get total discount in terms of tl:
    total_discount_value = amount * quantity * (1 - (1 - (discount_1 / 100)) * (1 - (discount_2 / 100)) * (1 - (discount_3 / 100)))
    # Get individual number:
    individual_number = basket_id_to_individual_number[basket_id]
    
    if individual_number not in individual_shopping_volume:
        individual_shopping_volume[individual_number] = 0
    individual_shopping_volume[individual_number] += quantity * amount

    if store_type == 1:
        # Storing method: type1 -> Store the products seperately.
        # Initialize customer shopping information if customer is not in information_shopping_information:
        if individual_number not in individual_shopping_information:
            individual_shopping_information[individual_number] = []
            
        product_information = {}
        product_information["category_level_1"] = category_1
        product_information["category_level_2"] = category_2
        product_information["category_level_3"] = category_3
        product_information["category_level_4"] = category_4
        product_information["amount"] = amount
        product_information["quantity"] = quantity

        individual_shopping_information[individual_number].append(product_information)
    
    elif store_type == 2:
        # Storing method: type2 -> Store the products by keys as category_1 and category_2, values = volume of the buy history.
        # Initialize customer shopping information if customer is not in individual_shopping_information:        
        if individual_number not in individual_shopping_information:
            individual_shopping_information[individual_number] = {}
        
        # Initialize category_1 in customer's shopping history if category_1 is not in customer's shopping history:
        if category_1 not in individual_shopping_information[individual_number]:
            individual_shopping_information[individual_number][category_1] = {}

        # Initialize category_1 in customer's category_1 shopping history if category_2 is not in customer's category_1 shopping history:
        if category_2 not in individual_shopping_information[individual_number][category_1]:
             # 0th index will be quantity, 1st index will be total money spent on that category combination.
            individual_shopping_information[individual_number][category_1][category_2] = [0, 0]
        
        # Increment the total volume in customer's shopping history branch: category_1, branch: category_2:
        individual_shopping_information[individual_number][category_1][category_2][0] += quantity
        individual_shopping_information[individual_number][category_1][category_2][1] += quantity * amount

    elif store_type == 3:
        # Storing method: type3 -> Store the products' volume by category_number used in campaign.
        # To match a product's category level to category type used in campaign, use the hash= {category_level_1 + category_level_2 + category_level_3 + category_level_2}
        # Initialize customer shopping information if customer is not in individual_shopping_information:
        if individual_number not in individual_shopping_information:
            individual_shopping_information[individual_number] = {}
            individual_to_total_discount[individual_number] = 0
        
        category_hash = str(category_1) + str(category_2) + str(category_3) + str(category_4)
        try:
            # If category_hash is found in hash_to_category_number:
            category_number_list = hash_to_category_number[category_hash]
        except:
            # Else category_number is unknown
            category_number_list = ["unknown"]

        # Initialize category_number in customer's shopping history if category_number is not in customer's shopping history:
        for category_number in category_number_list:
            if category_number not in individual_shopping_information[individual_number]:
                # 0th index will be quantity, 1st index will be total money spent on that category number.
                individual_shopping_information[individual_number][category_number] = [0, 0]
            
            individual_shopping_information[individual_number][category_number][0] += quantity
            individual_shopping_information[individual_number][category_number][1] += quantity * amount

        # Add total discount earned by customer:
        individual_to_total_discount[individual_number] += total_discount_value

In [8]:
individual_to_total_discount

{90065509: 3274.7931359070712,
 56076820: 368.5615558013625,
 37752547: 749.0056087119443,
 98384331: 156.81986585068393,
 94255308: 110.13502929323064,
 93937111: 3401.73919138503,
 94688545: 97.06568871123869,
 91747483: 3073.1180135719073,
 94778614: 5237.759673644285,
 16893075: 178.90372790320328,
 94616834: 1236.2196031971614,
 94697868: 19.89641185390461,
 74174356: 29.599544407201932,
 96249709: 2808.797908318407,
 73870110: 462.5249998909595,
 16472172: 1903.1842861732866,
 88801604: 10513.535668392464,
 93449464: 1514.3134671574367,
 92876940: 14079.69040122394,
 89216880: 429.31949906660697,
 93350998: 858.573211572,
 96787839: 832.8973876418496,
 95993648: 303.18105248021925,
 25235949: 371.4136164711623,
 93862335: 31.923026943575,
 57167031: 19.994308965,
 93318609: 839.6616735017753,
 98243967: 15.27853863687,
 74945395: 584.0883996449508,
 68952827: 13.912196999999999,
 93045592: 2035.4002561721813,
 97109866: 308.2178427302997,
 93522155: 250.2897986380822,
 97690886: 

In [9]:
# Prepare individual shopping volume and individual total discount data:
individual_shopping_volume_df = pd.DataFrame()
individual_shopping_volume_df["individualnumber"] = individual_shopping_volume.keys()
individual_shopping_volume_df["total_money_spent"] = individual_shopping_volume.values()

individual_total_discount_df = pd.DataFrame()
individual_total_discount_df["individualnumber"] = individual_to_total_discount.keys()
individual_total_discount_df["total_discount"] = individual_to_total_discount.values()

individual_total_discount_df


Unnamed: 0,individualnumber,total_discount
0,90065509,3274.793136
1,56076820,368.561556
2,37752547,749.005609
3,98384331,156.819866
4,94255308,110.135029
...,...,...
28571,95633120,0.043725
28572,101593888,23.101875
28573,100444534,0.000000
28574,22325553,0.720000


In [10]:
# Collect and organize individual personal information data:

customer_personal = pd.read_csv("../data/customer.csv")
customer_personal["age"] = 2022 - customer_personal["dateofbirth"]
customer_personal.drop(columns="dateofbirth", inplace=True)

customer_personal

Unnamed: 0,individualnumber,gender,city_code,age
0,94212124,K,,76.0
1,96387515,E,34.0,48.0
2,95040383,E,80.0,42.0
3,94694434,E,,52.0
4,47648671,E,35.0,59.0
...,...,...,...,...
28588,94368469,K,34.0,74.0
28589,90691477,E,7.0,32.0
28590,95834384,E,22.0,52.0
28591,92199848,E,34.0,37.0


In [11]:
# Prepare the data frame to explore the correlation between shopping history and other features of an individual and their response.

# To initialize the data, merge train data with individual shopping volume (total money spent):
response_by_shopping_history = pd.merge(train, individual_shopping_volume_df, how="inner", on="individualnumber")
# Merge the data with individual total discount:
response_by_shopping_history = pd.merge(response_by_shopping_history, individual_total_discount_df, how="inner", on="individualnumber")
# Merge the data with individual shopping details (sanal percentage, months since last shopping etc.)
response_by_shopping_history = pd.merge(response_by_shopping_history, individual_shopping_details, how="inner", on="individualnumber")
# Merge the data with customer personal information (age, gender etc.): 
response_by_shopping_history = pd.merge(response_by_shopping_history, customer_personal, how="inner", on="individualnumber")

# Prepeare relevant category, relevant quantity features:
# relevant_category = The category number of the campaing that has been proposed to individual.

# Includes total money spent by a customer in relevant_category:
relevant_category_volume_column = []
# Includes percentage of relevant_category expenses of a customer,
# (relevant_category expense / total expense):
relevant_category_percent_without_unknown_column = []
# Includes percentage of relevant_category expenses of a customer excluding unknown category expenses,
# (relevant_category expense / (total expense - unknown expense)):
relevant_category_percent_column = []
# Includes total quantity of items in relevant_category that is bought by customer:
relevant_category_quantity_column = []

# Iterate over individuals to add more features into the data frame.
for row in response_by_shopping_history.itertuples():
    individual_number = row[1]
    category_number = row[2]
    total_money_spent = row[6]

    # Get relevant category expense from the data
    try:
        relevant_money_spent = individual_shopping_information[individual_number][category_number][1]
        relevant_quantity = individual_shopping_information[individual_number][category_number][0]
    except:
        relevant_money_spent = 0
        relevant_quantity = 0
    
    # Get unknown expense from the data
    try:
        unknown = individual_shopping_information[individual_number]["unknown"][1]
    except:
        unknown = 0
    
    # Try to find (relevant_category expense / (total expense - unknown expense)), if total expense = unknown expense, set percentage = 0:
    try:
        relevant_category_percent_without_unknown_column.append(round(relevant_money_spent / (total_money_spent - unknown), 3))
    except:
        relevant_category_percent_without_unknown_column.append(0) 
   
    # Append columns by relevant information:
    relevant_category_volume_column.append(round(relevant_money_spent, 3))
    relevant_category_percent_column.append(round(relevant_money_spent / total_money_spent, 3))
    relevant_category_quantity_column.append(round(relevant_quantity, 3))

# Finalize columns in the original data frame:
response_by_shopping_history["relevant_category_volume"] = relevant_category_volume_column
response_by_shopping_history["relevant_category_quantity"] = relevant_category_quantity_column
# response_by_shopping_history["relevant_category_percent"] = relevant_category_percent_column
# response_by_shopping_history["relevant_category_percent_without_unknown"] = relevant_category_percent_without_unknown_column

response_by_shopping_history

Unnamed: 0,individualnumber,category_number,hakkedis_amt,odul_amt,response,total_money_spent,total_discount,sanal_percent,shop_count,months_since_last_shopping,gender,city_code,age,relevant_category_volume,relevant_category_quantity
0,94230288,9000,21.0,2.0,0,7118.94741,177.489978,0.015625,64,1,E,7.0,39.0,241.110,36.000
1,4684087,9000,17.0,1.0,0,10192.64563,31907.260487,0.000000,30,1,E,19.0,71.0,105.900,12.000
2,92472145,9058,24.0,3.0,0,35113.80415,10060.118391,0.013514,518,1,K,35.0,40.0,120.850,11.000
3,88026681,9030,22.0,2.0,0,11559.66676,231.976316,0.950000,40,1,K,,61.0,138.650,8.000
4,98127795,9001,38.0,3.0,0,461.77380,1.738800,0.000000,4,3,E,9.0,38.0,38.000,2.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13106,97214433,9059,27.0,2.0,0,16893.83688,1598.015176,0.009881,506,1,K,,25.0,58.960,5.000
13107,96841665,9004,65.0,6.0,0,13089.84113,500.344604,0.000000,40,1,E,7.0,51.0,250.750,5.000
13108,98445787,9044,33.0,3.0,0,2432.60788,244.758629,0.000000,39,1,K,33.0,47.0,27.656,0.832
13109,13781030,9004,66.0,6.0,0,1242.70635,6.080452,0.090909,11,1,E,54.0,41.0,68.500,1.000


In [15]:
test_df = pd.read_csv("../data/test.csv")

# Prepare the data frame to explore the correlation between shopping history and other features of an individual and their response.

# To initialize the data, merge train data with individual shopping volume (total money spent):
response_by_shopping_history_test = pd.merge(test_df, individual_shopping_volume_df, how="inner", on="individualnumber")
# Merge the data with individual total discount:
response_by_shopping_history_test = pd.merge(response_by_shopping_history_test, individual_total_discount_df, how="inner", on="individualnumber")
# Merge the data with individual shopping details (sanal percentage, months since last shopping etc.)
response_by_shopping_history_test = pd.merge(response_by_shopping_history_test, individual_shopping_details, how="inner", on="individualnumber")
# Merge the data with customer personal information (age, gender etc.): 
response_by_shopping_history_test = pd.merge(response_by_shopping_history_test, customer_personal, how="inner", on="individualnumber")

# Prepeare relevant category, relevant quantity features:
# relevant_category = The category number of the campaing that has been proposed to individual.

# Includes total money spent by a customer in relevant_category:
relevant_category_volume_column = []
# Includes percentage of relevant_category expenses of a customer,
# (relevant_category expense / total expense):
relevant_category_percent_without_unknown_column = []
# Includes percentage of relevant_category expenses of a customer excluding unknown category expenses,
# (relevant_category expense / (total expense - unknown expense)):
relevant_category_percent_column = []
# Includes total quantity of items in relevant_category that is bought by customer:
relevant_category_quantity_column = []

# Iterate over individuals to add more features into the data frame.
for row in response_by_shopping_history_test.itertuples():
    individual_number = row[1]
    category_number = row[2]
    total_money_spent = row[6]

    # Get relevant category expense from the data
    try:
        relevant_money_spent = individual_shopping_information[individual_number][category_number][1]
        relevant_quantity = individual_shopping_information[individual_number][category_number][0]
    except:
        relevant_money_spent = 0
        relevant_quantity = 0
    
    # Get unknown expense from the data
    try:
        unknown = individual_shopping_information[individual_number]["unknown"][1]
    except:
        unknown = 0
   
    # Append columns by relevant information:
    relevant_category_volume_column.append(round(relevant_money_spent, 3))
    #relevant_category_percent_column.append(round(relevant_money_spent / total_money_spent, 3))
    relevant_category_quantity_column.append(round(relevant_quantity, 3))

# Finalize columns in the original data frame:
response_by_shopping_history_test["relevant_category_volume"] = relevant_category_volume_column
response_by_shopping_history_test["relevant_category_quantity"] = relevant_category_quantity_column
# response_by_shopping_history_test["relevant_category_percent"] = relevant_category_percent_column
# response_by_shopping_history_test["relevant_category_percent_without_unknown"] = relevant_category_percent_without_unknown_column

response_by_shopping_history_test.describe()

Unnamed: 0,individualnumber,category_number,hakkedis_amt,odul_amt,total_money_spent,total_discount,sanal_percent,shop_count,months_since_last_shopping,city_code,age,relevant_category_volume,relevant_category_quantity
count,13225.0,13225.0,13225.0,13225.0,13225.0,13218.0,13225.0,13225.0,13225.0,10321.0,13217.0,13225.0,13225.0
mean,90128600.0,9029.709943,40.259584,3.701928,5968.267,1747.167,0.073658,29.964839,2.360756,31.484934,41.952561,219.2105,6.882305
std,21057360.0,17.854188,25.612783,2.642906,55830.0,105454.2,0.215655,52.394111,2.380982,18.007309,14.102635,9432.027,26.998513
min,356411.0,9000.0,7.0,1.0,0.01,-45.18666,0.0,1.0,1.0,1.0,-27.0,0.0,0.0
25%,93466060.0,9017.0,21.0,2.0,387.958,6.422546,0.0,4.0,1.0,16.0,31.0,0.0,0.0
50%,96342830.0,9035.0,32.0,3.0,1425.325,40.71957,0.0,13.0,1.0,34.0,40.0,22.56,2.0
75%,99393600.0,9046.0,55.0,5.0,4774.184,183.9468,0.0,35.0,3.0,39.0,51.0,84.95,5.142
max,102281900.0,9061.0,180.0,20.0,5409509.0,11469800.0,1.0,1029.0,12.0,81.0,102.0,1075747.0,2112.0
