In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [3]:
from catboost import CatBoostRegressor
from catboost import Pool
from sklearn.model_selection import  train_test_split
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    precision_score,
    recall_score,
    matthews_corrcoef,
    roc_auc_score,
    confusion_matrix,
    classification_report,
    r2_score,
    mean_squared_error,
    mean_absolute_error,
    mean_absolute_percentage_error,
)
import shap
from sklearn.model_selection import RepeatedKFold, RepeatedStratifiedKFold

In [4]:
def score_regression_simple(target, predictions,) -> pd.DataFrame:
    '''
    Функция для подсчета метрик
    '''
    predictions = np.array(predictions)

    metrics = {
        "R2":       round(r2_score(target, predictions),3),
        "MAE":  round(mean_absolute_error(target, predictions),3),
        "MSE":  round(mean_squared_error(target, predictions),3),
        "RMSE":   round((np.sqrt(mean_squared_error(target, predictions))),3),
        "MAPE":        round(mean_absolute_percentage_error(target, predictions),3),
        } 
    return metrics

# SETUP

In [5]:
BD_NAME = 'export_20220916104856.csv'
BD_FOLDER = './data'
VERSION = 'v2.2.220916'
START_DATE = '2022-09-03'
TETS_DATE = '2022-09-12'

In [6]:
data_quality_expected_range_dict = {
    'sneaker':
        {
        #'is_type': {'genesis': bool},
        'is_in':
            {
            'rarity': ['common', 'uncommon', 'rare'],
            'sneaker_type': ['ranger', 'sprinter', 'hiker', 'coacher'],
            },
        'min_max':
            {
            'level': [0, 30],
            'base_performance': [0, 40],
            'base_fortune': [0, 40],
            'base_joy': [0, 40],
            'base_durability': [0, 40],
            'performance': [0, 500],
            'fortune': [0, 300],
            'joy': [0, 300],
            'durability': [0, 300],
            'mint': [0, 7],
            'buy_count_12H': [0, 500],
            'buy_count_24H': [0, 1000],
            'sell_count_24H': [0, 2000],
            'cancel_count_24H': [0, 1000],
            'token_all_activity_3H': [0, 30],
            'token_sell_activity_6H': [0, 30],
            },
        }
}

# DATA

In [7]:
data = pd.read_csv(f'{BD_FOLDER}/{BD_NAME}')
data

Unnamed: 0,timestamp,mrk_id,token_id,event,wallet_from,wallet_to,price,item_type,rarity,sneaker_type,...,time_ownership,wallet_box_mint,wallet_sneaker_mint,time_level_up,time_level_up_for_mint,base_mint_price_amt,base_mint_price_azy,base_mint_price_bnb,predict_base,predict
0,1663325312,31117,45628,buy,0x2ae3cd922642febcb8c96be9b51453a8f792c31f,0x502eb9c57f091f959bb9b7a91d36a137f2cf791a,0.80,sneakers,common,sprinter,...,582818.0,6.0,12.0,900.0,1800.0,300.0,200.0,0.662309,,
1,1663325274,30861,20242,buy,0xcf9ecfba4610c2669627680cc285824a9cd9c038,0x3da9c7cda9df9451f80b1489dffea047a996b0b7,0.85,sneakers,common,hiker,...,3851881.0,5.0,5.0,900.0,1800.0,300.0,200.0,0.662309,,
2,1663325236,31181,39363,buy,0x138a01be8fdb1ef0d054f375380c16ee8f829e3a,0x502eb9c57f091f959bb9b7a91d36a137f2cf791a,0.74,sneakers,common,hiker,...,825626.0,3.0,3.0,900.0,1800.0,300.0,200.0,0.662309,,
3,1663325168,31181,39363,sell,0x138a01be8fdb1ef0d054f375380c16ee8f829e3a,,0.74,sneakers,common,hiker,...,825558.0,3.0,3.0,900.0,1800.0,300.0,200.0,0.662309,0.729,0.793
4,1663325129,31149,18346,buy,0x980f18d18293290117d3b5f1f326ecb754f3b1fe,0x132b03bdef02c593aa211d76ceedcf414a8a8430,1.39,sneakers,common,coacher,...,5617.0,10.0,10.0,9180.0,1800.0,300.0,200.0,0.662309,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34010,1661029606,13798,24335,sell,0x0d4eafe1ed97f590fde02c21dcf2b0ae6cb106d7,,0.90,sneakers,common,hiker,...,1312556.0,7.0,9.0,900.0,1800.0,300.0,200.0,,,
34011,1661029594,13797,27211,sell,0xb83b40609684191abba9184f627a112df5ac0a44,,0.92,sneakers,common,hiker,...,86772.0,5.0,5.0,900.0,1800.0,300.0,200.0,,,
34012,1661029585,13596,32513,buy,0x1041f11e1738c5ea8d3ff3c5a1a314a617313ee1,0x2eeedd73169a17e4f79a6d6bea238f11b05238e4,0.92,box,common,,...,27387.0,7.0,9.0,,1800.0,300.0,200.0,,,
34013,1661029462,13796,32779,sell,0x668ff3191878ed0f7112031305eccb1d79a865e4,,1.00,sneakers,common,sprinter,...,216.0,7.0,8.0,0.0,1800.0,300.0,200.0,,,


In [8]:
data.columns

Index(['timestamp', 'mrk_id', 'token_id', 'event', 'wallet_from', 'wallet_to',
       'price', 'item_type', 'rarity', 'sneaker_type', 'level',
       'base_performance', 'base_fortune', 'base_joy', 'base_durability',
       'performance', 'fortune', 'joy', 'durability', 'mint', 'parent1',
       'parent2', 'genesis', 'parent1_sneaker_type', 'parent2_sneaker_type',
       'price_bnb', 'price_azy', 'price_amt', 'wallet_from_buy_count',
       'wallet_from_all_count', 'token_all_activity_3H',
       'token_sell_activity_6H', 'buy_count_12H', 'buy_count_24H',
       'buy_count_48H', 'sell_count_12H', 'sell_count_24H', 'sell_count_48H',
       'cancel_count_12H', 'cancel_count_24H', 'cancel_count_48H',
       'min_price_all_24H', 'min_price_by_rarity_genesis_type_level_mint_24H',
       'min_price_by_rarity_genesis_type_level_mint_48H',
       'min_price_by_rarity_genesis_type_level_mint_72H',
       'min_price_by_rarity_genesis_type_24H',
       'min_price_by_rarity_genesis_type_48H',
    

In [9]:
data[['wallet_first_sneaker_time',
       'time_ownership', 'wallet_box_mint', 'wallet_sneaker_mint',
       'time_level_up', 'time_level_up_for_mint', 'base_mint_price_amt',
       'base_mint_price_azy', 'base_mint_price_bnb']].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34015 entries, 0 to 34014
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   wallet_first_sneaker_time  24812 non-null  float64
 1   time_ownership             24812 non-null  float64
 2   wallet_box_mint            24812 non-null  float64
 3   wallet_sneaker_mint        24812 non-null  float64
 4   time_level_up              22475 non-null  float64
 5   time_level_up_for_mint     24812 non-null  float64
 6   base_mint_price_amt        24812 non-null  float64
 7   base_mint_price_azy        24812 non-null  float64
 8   base_mint_price_bnb        14789 non-null  float64
dtypes: float64(9)
memory usage: 2.3 MB


In [10]:
data.genesis

0        False
1         True
2        False
3        False
4         True
         ...  
34010     True
34011    False
34012    False
34013    False
34014     True
Name: genesis, Length: 34015, dtype: bool

In [11]:
data = data[(data.item_type == 'sneakers') & (data.event == 'sell')].copy()
data

Unnamed: 0,timestamp,mrk_id,token_id,event,wallet_from,wallet_to,price,item_type,rarity,sneaker_type,...,time_ownership,wallet_box_mint,wallet_sneaker_mint,time_level_up,time_level_up_for_mint,base_mint_price_amt,base_mint_price_azy,base_mint_price_bnb,predict_base,predict
3,1663325168,31181,39363,sell,0x138a01be8fdb1ef0d054f375380c16ee8f829e3a,,0.74,sneakers,common,hiker,...,825558.0,3.0,3.0,900.0,1800.0,300.0,200.0,0.662309,0.729,0.793
8,1663324958,31180,47952,sell,0x8054bf1f1a6fbecb52f90eacaab28cfe57c4ee5c,,1.10,sneakers,common,hiker,...,36.0,9.0,20.0,900.0,1800.0,300.0,200.0,0.660589,1.001,0.963
13,1663324383,31179,42001,sell,0x2753dd17d0cbc2becc4eb793288b717d4504a6e8,,0.79,sneakers,common,hiker,...,941315.0,12.0,14.0,900.0,1800.0,300.0,200.0,0.661303,0.758,0.842
14,1663324317,31178,49951,sell,0x00a76804cab86f32ee98e2a17ec633a9090b5b5c,,2.89,sneakers,common,coacher,...,49423.0,6.0,5.0,0.0,1800.0,300.0,200.0,0.661344,1.216,1.343
15,1663324301,31177,30791,sell,0xe394f14e45f97b666b2875254f9664741ed5814c,,2.10,sneakers,common,hiker,...,1187889.0,0.0,0.0,12600.0,1800.0,300.0,200.0,0.661344,1.610,1.548
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34009,1661029666,13799,32567,sell,0x8aa37a4036f03ee2fc7f142f2ca7072411913af5,,1.50,sneakers,common,coacher,...,32472.0,1.0,4.0,0.0,1800.0,300.0,200.0,,,
34010,1661029606,13798,24335,sell,0x0d4eafe1ed97f590fde02c21dcf2b0ae6cb106d7,,0.90,sneakers,common,hiker,...,1312556.0,7.0,9.0,900.0,1800.0,300.0,200.0,,,
34011,1661029594,13797,27211,sell,0xb83b40609684191abba9184f627a112df5ac0a44,,0.92,sneakers,common,hiker,...,86772.0,5.0,5.0,900.0,1800.0,300.0,200.0,,,
34013,1661029462,13796,32779,sell,0x668ff3191878ed0f7112031305eccb1d79a865e4,,1.00,sneakers,common,sprinter,...,216.0,7.0,8.0,0.0,1800.0,300.0,200.0,,,


# data_quality

In [12]:
dq = data_quality_expected_range_dict['sneaker']['is_in']
for feature in dq.keys():
    if data[feature].isna().sum() > 0:
        print(f'Nans in {feature}')

    for element in data[feature].unique():
        if element not in dq[feature]:
            print(f'{feature} not in data_quality_expected_range_dict. get {element}')

dq = data_quality_expected_range_dict['sneaker']['min_max']
for feature in dq.keys():
    if data[feature].isna().sum() > 0:
        print(f'Nans in {feature}')

    if data[feature].min() < dq[feature][0]:
        print(f'{feature} less then expected. get: {data[feature].max()} | expected: {dq[feature][1]}')

    if data[feature].max() > dq[feature][1]:
        print(f'{feature} more then expected. get: {data[feature].max()} | expected: {dq[feature][1]}')

In [13]:
data['mint'].isna().sum()

0

In [14]:
data[data['level'] == 5]['mint'].value_counts()

2.0    5186
0.0    1636
1.0    1068
3.0     521
4.0       3
Name: mint, dtype: int64

In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15819 entries, 3 to 34014
Data columns (total 59 columns):
 #   Column                                           Non-Null Count  Dtype  
---  ------                                           --------------  -----  
 0   timestamp                                        15819 non-null  int64  
 1   mrk_id                                           15819 non-null  int64  
 2   token_id                                         15819 non-null  int64  
 3   event                                            15819 non-null  object 
 4   wallet_from                                      15819 non-null  object 
 5   wallet_to                                        0 non-null      object 
 6   price                                            15819 non-null  float64
 7   item_type                                        15819 non-null  object 
 8   rarity                                           15819 non-null  object 
 9   sneaker_type                

In [16]:
data[['wallet_first_sneaker_time',
       'time_ownership', 'wallet_box_mint', 'wallet_sneaker_mint',
       'time_level_up', 'time_level_up_for_mint', 'base_mint_price_amt',
       'base_mint_price_azy', 'base_mint_price_bnb']].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15819 entries, 3 to 34014
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   wallet_first_sneaker_time  15819 non-null  float64
 1   time_ownership             15819 non-null  float64
 2   wallet_box_mint            15819 non-null  float64
 3   wallet_sneaker_mint        15819 non-null  float64
 4   time_level_up              15819 non-null  float64
 5   time_level_up_for_mint     15819 non-null  float64
 6   base_mint_price_amt        15819 non-null  float64
 7   base_mint_price_azy        15819 non-null  float64
 8   base_mint_price_bnb        9224 non-null   float64
dtypes: float64(9)
memory usage: 1.2 MB


In [17]:
data[['wallet_first_sneaker_time',
       'time_ownership', 'wallet_box_mint', 'wallet_sneaker_mint',
       'time_level_up', 'time_level_up_for_mint', 'base_mint_price_amt',
       'base_mint_price_azy', 'base_mint_price_bnb']]

Unnamed: 0,wallet_first_sneaker_time,time_ownership,wallet_box_mint,wallet_sneaker_mint,time_level_up,time_level_up_for_mint,base_mint_price_amt,base_mint_price_azy,base_mint_price_bnb
3,1.662500e+09,825558.0,3.0,3.0,900.0,1800.0,300.0,200.0,0.662309
8,1.657805e+09,36.0,9.0,20.0,900.0,1800.0,300.0,200.0,0.660589
13,1.658319e+09,941315.0,12.0,14.0,900.0,1800.0,300.0,200.0,0.661303
14,1.657800e+09,49423.0,6.0,5.0,0.0,1800.0,300.0,200.0,0.661344
15,1.662136e+09,1187889.0,0.0,0.0,12600.0,1800.0,300.0,200.0,0.661344
...,...,...,...,...,...,...,...,...,...
34009,1.658787e+09,32472.0,1.0,4.0,0.0,1800.0,300.0,200.0,
34010,1.657800e+09,1312556.0,7.0,9.0,900.0,1800.0,300.0,200.0,
34011,1.657800e+09,86772.0,5.0,5.0,900.0,1800.0,300.0,200.0,
34013,1.657800e+09,216.0,7.0,8.0,0.0,1800.0,300.0,200.0,


In [18]:
data.columns

Index(['timestamp', 'mrk_id', 'token_id', 'event', 'wallet_from', 'wallet_to',
       'price', 'item_type', 'rarity', 'sneaker_type', 'level',
       'base_performance', 'base_fortune', 'base_joy', 'base_durability',
       'performance', 'fortune', 'joy', 'durability', 'mint', 'parent1',
       'parent2', 'genesis', 'parent1_sneaker_type', 'parent2_sneaker_type',
       'price_bnb', 'price_azy', 'price_amt', 'wallet_from_buy_count',
       'wallet_from_all_count', 'token_all_activity_3H',
       'token_sell_activity_6H', 'buy_count_12H', 'buy_count_24H',
       'buy_count_48H', 'sell_count_12H', 'sell_count_24H', 'sell_count_48H',
       'cancel_count_12H', 'cancel_count_24H', 'cancel_count_48H',
       'min_price_all_24H', 'min_price_by_rarity_genesis_type_level_mint_24H',
       'min_price_by_rarity_genesis_type_level_mint_48H',
       'min_price_by_rarity_genesis_type_level_mint_72H',
       'min_price_by_rarity_genesis_type_24H',
       'min_price_by_rarity_genesis_type_48H',
    

In [19]:
#int_columns = ['timestamp', 'mrk_id', 'token_id', 'level', 'mint']
#float_columns = ['performance', 'fortune', 'joy', 'durability', 'price']
#cat_columns = ['item_type', 'rarity', 'sneaker_type',]

In [20]:
#for columns in int_columns:
#    data[columns] = data[columns].astype('int')

# for columns in float_columns:
#     data[columns] = data[columns].astype('float')

#for columns in cat_columns:
#    data[columns] = data[columns].astype('category')

In [21]:
#data['min_price_by_rarity_genesis_type_level_mint_24H'] = data['min_price_by_rarity_genesis_type_level_mint_24H'].fillna(0)
#data['min_price_by_rarity_genesis_type_24H'] = data['min_price_by_rarity_genesis_type_24H'].fillna(0)

In [22]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15819 entries, 3 to 34014
Data columns (total 59 columns):
 #   Column                                           Non-Null Count  Dtype  
---  ------                                           --------------  -----  
 0   timestamp                                        15819 non-null  int64  
 1   mrk_id                                           15819 non-null  int64  
 2   token_id                                         15819 non-null  int64  
 3   event                                            15819 non-null  object 
 4   wallet_from                                      15819 non-null  object 
 5   wallet_to                                        0 non-null      object 
 6   price                                            15819 non-null  float64
 7   item_type                                        15819 non-null  object 
 8   rarity                                           15819 non-null  object 
 9   sneaker_type                

In [23]:
data

Unnamed: 0,timestamp,mrk_id,token_id,event,wallet_from,wallet_to,price,item_type,rarity,sneaker_type,...,time_ownership,wallet_box_mint,wallet_sneaker_mint,time_level_up,time_level_up_for_mint,base_mint_price_amt,base_mint_price_azy,base_mint_price_bnb,predict_base,predict
3,1663325168,31181,39363,sell,0x138a01be8fdb1ef0d054f375380c16ee8f829e3a,,0.74,sneakers,common,hiker,...,825558.0,3.0,3.0,900.0,1800.0,300.0,200.0,0.662309,0.729,0.793
8,1663324958,31180,47952,sell,0x8054bf1f1a6fbecb52f90eacaab28cfe57c4ee5c,,1.10,sneakers,common,hiker,...,36.0,9.0,20.0,900.0,1800.0,300.0,200.0,0.660589,1.001,0.963
13,1663324383,31179,42001,sell,0x2753dd17d0cbc2becc4eb793288b717d4504a6e8,,0.79,sneakers,common,hiker,...,941315.0,12.0,14.0,900.0,1800.0,300.0,200.0,0.661303,0.758,0.842
14,1663324317,31178,49951,sell,0x00a76804cab86f32ee98e2a17ec633a9090b5b5c,,2.89,sneakers,common,coacher,...,49423.0,6.0,5.0,0.0,1800.0,300.0,200.0,0.661344,1.216,1.343
15,1663324301,31177,30791,sell,0xe394f14e45f97b666b2875254f9664741ed5814c,,2.10,sneakers,common,hiker,...,1187889.0,0.0,0.0,12600.0,1800.0,300.0,200.0,0.661344,1.610,1.548
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34009,1661029666,13799,32567,sell,0x8aa37a4036f03ee2fc7f142f2ca7072411913af5,,1.50,sneakers,common,coacher,...,32472.0,1.0,4.0,0.0,1800.0,300.0,200.0,,,
34010,1661029606,13798,24335,sell,0x0d4eafe1ed97f590fde02c21dcf2b0ae6cb106d7,,0.90,sneakers,common,hiker,...,1312556.0,7.0,9.0,900.0,1800.0,300.0,200.0,,,
34011,1661029594,13797,27211,sell,0xb83b40609684191abba9184f627a112df5ac0a44,,0.92,sneakers,common,hiker,...,86772.0,5.0,5.0,900.0,1800.0,300.0,200.0,,,
34013,1661029462,13796,32779,sell,0x668ff3191878ed0f7112031305eccb1d79a865e4,,1.00,sneakers,common,sprinter,...,216.0,7.0,8.0,0.0,1800.0,300.0,200.0,,,


In [24]:
data['date'] = pd.to_datetime(data['timestamp'], unit='s')
data = data.sort_values('date')

In [25]:
#len(data[(data.date >= START_DATE) & (data.date <= TETS_DATE)])

In [26]:
#train_data = data[(data.date >= START_DATE) & (data.date <= TETS_DATE)].copy()

# ML

## FE

In [27]:
#data['min_price_by_rarity_genesis_type_level_mint_24H_nan'] = data['min_price_by_rarity_genesis_type_level_mint_24H'].isna()
#data['min_price_by_rarity_genesis_type_24H_nan'] = data['min_price_by_rarity_genesis_type_24H'].isna()

#data['min_price_by_rarity_genesis_type_level_mint_24H'] = data['min_price_by_rarity_genesis_type_level_mint_24H'].fillna(0)
#data['min_price_by_rarity_genesis_type_24H'] = data['min_price_by_rarity_genesis_type_24H'].fillna(0)

data['sum_activity_24H'] = data['buy_count_24H'] + data['sell_count_24H'] + data['cancel_count_24H']
data['sells_activity_24H'] = data['sell_count_24H'] / data['buy_count_24H']

In [28]:
data['token_sell_activity_6H'].max()

12

In [29]:
data['min_price_by_rarity_genesis_type_level_mint_24H'] = data['min_price_by_rarity_genesis_type_level_mint_24H'].fillna(np.nan)
data['min_price_by_rarity_genesis_type_24H'] = data['min_price_by_rarity_genesis_type_24H'].fillna(np.nan)

In [30]:
len(data[(data.date >= START_DATE) & (data.date <= TETS_DATE)])

5414

In [31]:
data = data[(data.date >= START_DATE)].copy()
train_data = data[(data.date >= START_DATE) & (data.date <= TETS_DATE)].copy()
test_data = data[(data.date > TETS_DATE)].copy()

In [32]:
data.columns

Index(['timestamp', 'mrk_id', 'token_id', 'event', 'wallet_from', 'wallet_to',
       'price', 'item_type', 'rarity', 'sneaker_type', 'level',
       'base_performance', 'base_fortune', 'base_joy', 'base_durability',
       'performance', 'fortune', 'joy', 'durability', 'mint', 'parent1',
       'parent2', 'genesis', 'parent1_sneaker_type', 'parent2_sneaker_type',
       'price_bnb', 'price_azy', 'price_amt', 'wallet_from_buy_count',
       'wallet_from_all_count', 'token_all_activity_3H',
       'token_sell_activity_6H', 'buy_count_12H', 'buy_count_24H',
       'buy_count_48H', 'sell_count_12H', 'sell_count_24H', 'sell_count_48H',
       'cancel_count_12H', 'cancel_count_24H', 'cancel_count_48H',
       'min_price_all_24H', 'min_price_by_rarity_genesis_type_level_mint_24H',
       'min_price_by_rarity_genesis_type_level_mint_48H',
       'min_price_by_rarity_genesis_type_level_mint_72H',
       'min_price_by_rarity_genesis_type_24H',
       'min_price_by_rarity_genesis_type_48H',
    

In [33]:
MODEL_DIR = './models/'
MODEL_NAME = f'sneaker_model_{VERSION}.model'

model = CatBoostRegressor()      # parameters not required.
model.load_model(MODEL_DIR+MODEL_NAME)

<catboost.core.CatBoostRegressor at 0x7f24acbf31c0>

In [34]:
predict_price = model.predict(data[model.feature_names_])
predict_price = np.round(predict_price, 2)
predict_price

array([0.75, 0.73, 0.9 , ..., 0.83, 1.05, 0.78])

In [35]:
data['predict'] = predict_price

In [36]:
MODEL_DIR = './models/'
MODEL_NAME = f'sneaker_base_model_{VERSION}.model'

model = CatBoostRegressor()      # parameters not required.
model.load_model(MODEL_DIR+MODEL_NAME)

predict_price = model.predict(data[model.feature_names_])
predict_price = np.round(predict_price, 2)

In [37]:
data['predict_base'] = predict_price

In [38]:
data['predict_diff_pct'] = (((data['predict'] / data['price']) * 100) - 100).astype(int)
data['predict_base_diff_pct'] = (((data['predict_base'] / data['price']) * 100) - 100).astype(int)

In [39]:
MODEL_DIR = './models/'
MODEL_NAME = f'sneaker_profit_model_{VERSION}.model'

model = CatBoostRegressor()      # parameters not required.
model.load_model(MODEL_DIR+MODEL_NAME)

predict_price = model.predict(data[model.feature_names_])
predict_price = np.round(predict_price, 2)
predict_price

array([0.11, 0.09, 0.15, ..., 0.13, 0.13, 0.05])

In [40]:
data['predict_profit'] = predict_price

In [41]:
features =  [
    'date',
    'token_id',
    'rarity',
    'sneaker_type',
    'genesis',
    'level',
    'base_performance',
    'base_fortune',
    'base_joy',
    'base_durability',
    'performance',
    'fortune',
    'joy',
    'durability',
    'mint',
    'price',
    'predict',
    'predict_base',
    'predict_diff_pct',
    'predict_base_diff_pct',
    'predict_profit']

In [42]:
df_tmp = data[features].copy()

In [43]:
df_tmp = df_tmp[(df_tmp['predict_diff_pct'] >= 10) & (df_tmp['predict_base_diff_pct'] >= 10)]
df_tmp

Unnamed: 0,date,token_id,rarity,sneaker_type,genesis,level,base_performance,base_fortune,base_joy,base_durability,...,fortune,joy,durability,mint,price,predict,predict_base,predict_diff_pct,predict_base_diff_pct,predict_profit
18014,2022-09-03 00:33:01,36449,common,sprinter,False,5,5.7,1.1,3.1,2.7,...,1.6,4.6,4.2,2.0,0.60,0.71,0.75,18,25,0.09
17970,2022-09-03 02:24:55,39560,common,ranger,False,0,4.2,7.9,8.0,1.5,...,7.9,8.0,1.5,0.0,0.69,0.85,0.88,23,27,0.12
17898,2022-09-03 05:28:13,38346,common,ranger,False,3,3.0,5.3,8.7,7.7,...,6.8,11.4,10.1,0.0,0.75,0.84,0.92,11,22,0.10
17792,2022-09-03 07:28:49,14857,common,sprinter,True,5,9.1,1.2,8.5,6.4,...,1.7,12.9,12.4,2.0,0.75,0.83,0.93,10,24,0.11
17480,2022-09-03 12:39:43,38929,common,hiker,False,0,5.6,6.0,3.9,1.9,...,6.0,3.9,1.9,0.0,0.79,0.89,0.89,12,12,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,2022-09-16 08:47:39,46132,uncommon,ranger,False,5,14.5,21.5,17.4,13.8,...,57.3,25.9,20.8,0.0,3.18,3.63,3.81,14,19,0.32
65,2022-09-16 09:24:58,45433,uncommon,sprinter,False,5,15.2,22.0,21.2,17.5,...,58.0,31.7,26.5,2.0,2.38,2.69,2.62,13,10,0.16
57,2022-09-16 09:35:25,17911,common,hiker,True,10,3.7,1.0,2.1,1.0,...,2.0,4.1,7.0,2.0,0.84,0.93,0.96,10,14,0.19
40,2022-09-16 10:06:16,16889,common,ranger,True,19,8.3,1.7,2.5,2.8,...,4.8,7.5,19.8,2.0,1.56,1.82,1.73,16,10,0.24


In [44]:
df_tmp = df_tmp.sort_values('date', ascending=False)

In [45]:
df_tmp.to_csv('labeling_profit_dataset.csv', index=False)

In [48]:
df_tmp.to_excel("labeling_profit_dataset.xlsx")

In [47]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.0.10-py2.py3-none-any.whl (242 kB)
[K     |████████████████████████████████| 242 kB 51.7 MB/s eta 0:00:01
[?25hCollecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.10
Note: you may need to restart the kernel to use updated packages.
