In [1]:
import pandas as pd

from functools import reduce
from collections import Counter

## Load Data

In [2]:
metadata_fpath = './nft_data/ETHEREUM/0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d/metadata.json'

In [3]:
pd_metadata = pd.read_json(metadata_fpath)

In [4]:
pd_metadata.head(3)

Unnamed: 0,id,blockchain,collection,contract,tokenId,creators,lazySupply,pending,mintedAt,lastUpdatedAt,...,meta,deleted,originOrders,ammOrders,auctions,totalStock,sellers,lastSale,bestSellOrder,bestBidOrder
0,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,449,[{'account': 'ETHEREUM:0x29ebde3010cb23bac83b3...,0,[],2021-04-30T21:08:58Z,2022-07-15T09:29:58Z,...,"{'name': 'BoredApeYachtClub #449', 'tags': [],...",False,[],{'ids': []},[],0,0,"{'date': '2022-11-01T02:30:59Z', 'seller': 'ET...",,
1,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,7665,[{'account': 'ETHEREUM:0x3f7dd0b106b10d06bb481...,0,[],2021-05-01T08:19:15Z,2022-07-15T08:35:55Z,...,"{'name': 'BoredApeYachtClub #7665', 'tags': []...",False,[],{'ids': []},[],0,0,"{'date': '2022-07-15T08:35:55Z', 'seller': 'ET...",,
2,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,4175,[{'account': 'ETHEREUM:0x090c847ae5623cb7c538b...,0,[],2021-05-01T07:32:44Z,2022-07-15T08:34:29Z,...,"{'name': 'BoredApeYachtClub #4175', 'tags': []...",False,[],{'ids': []},[],0,0,,,


## Preprocess

In [5]:
pd_metadata['num_attributes'] = pd_metadata['meta'].apply(lambda x: len(x['attributes']))
pd_metadata['attributes_dict'] = pd_metadata['meta'].apply(lambda x: {attr_kv['key']: attr_kv['value'] for attr_kv in  x['attributes']})

In [6]:
merged_attributes_list = pd_metadata['attributes_dict'].apply(lambda x: list(x.keys())).values.sum()
attributes_dict = dict(Counter(merged_attributes_list))

In [7]:
attributes_dict

{'Eyes': 10000,
 'Mouth': 10000,
 'Hat': 7744,
 'Fur': 10000,
 'Background': 10000,
 'Clothes': 8114,
 'Earring': 2977}

In [8]:
attribute_key_name_list = list(attributes_dict.keys())
attribute_key_column_name_list = [f'attribute_{attribute_key}' for attribute_key in attribute_key_name_list]
attribute_key_prob_column_name_list = [f'attribute_{attribute_key}_prob' for attribute_key in attribute_key_name_list]
attribute_key_score_column_name_list = [f'attribute_{attribute_key}_score' for attribute_key in attribute_key_name_list]

In [9]:
for attribute_key, attribute_key_column_name in zip(attribute_key_name_list, attribute_key_column_name_list):
    pd_metadata[attribute_key_column_name] = pd_metadata['attributes_dict'].apply(lambda x: x[attribute_key] if attribute_key in x else None)

## Calculator Score

In [10]:
def calc_attribute_rarity_prob_and_score(attribute_key_column_name):
    attr_count_dict = pd_metadata[attribute_key_column_name].value_counts().to_dict()
    attr_score_dict = {k: 1 / (attr_count_dict[k] / len(pd_metadata)) for k,v in attr_count_dict.items()}
    attr_prob_dict = {k: (attr_count_dict[k] / len(pd_metadata)) for k,v in attr_count_dict.items()}
    pd_metadata[f'{attribute_key_column_name}_score'] = pd_metadata[attribute_key_column_name].apply(lambda x: attr_score_dict.get(x, 0))
    pd_metadata[f'{attribute_key_column_name}_prob'] = pd_metadata[attribute_key_column_name].apply(lambda x: attr_prob_dict.get(x, 0))

In [11]:
for attribute_key_cname, attribute_key_score_cname in zip(attribute_key_column_name_list, attribute_key_score_column_name_list):
    calc_attribute_rarity_prob_and_score(attribute_key_cname)

In [12]:
pd_metadata['sum_attribute_prob'] = pd_metadata[attribute_key_prob_column_name_list].sum(axis=1)
pd_metadata['mean_attribute_prob'] = pd_metadata[attribute_key_prob_column_name_list].mean(axis=1)
pd_metadata['mul_attribute_prob'] = pd_metadata[attribute_key_prob_column_name_list].replace(to_replace=0, value=1.).apply(lambda x: reduce(lambda m,n: m * n, x), axis=1)

In [13]:
pd_metadata['sum_attribute_score'] = pd_metadata[attribute_key_score_column_name_list].sum(axis=1)
pd_metadata['mean_attribute_score'] = pd_metadata[attribute_key_score_column_name_list].mean(axis=1)
pd_metadata['mul_attribute_score'] = pd_metadata[attribute_key_score_column_name_list].replace(to_replace=0, value=1.).apply(lambda x: reduce(lambda m,n: m * n, x), axis=1)

In [17]:
pd_metadata['sum_attribute_prob_rank'] = pd_metadata['sum_attribute_prob'].rank(method='dense', ascending=True)
pd_metadata['mean_attribute_prob_rank'] = pd_metadata['mean_attribute_prob'].rank(method='dense', ascending=True)
pd_metadata['mul_attribute_prob_rank'] = pd_metadata['mul_attribute_prob'].rank(method='dense', ascending=True)

In [15]:
pd_metadata['sum_attribute_score_rank'] = pd_metadata['sum_attribute_score'].rank(method='dense', ascending=False)
pd_metadata['mean_attribute_score_rank'] = pd_metadata['mean_attribute_score'].rank(method='dense', ascending=False)
pd_metadata['mul_attribute_score_rank'] = pd_metadata['mul_attribute_score'].rank(method='dense', ascending=False)

In [18]:
pd_metadata[pd_metadata['sum_attribute_score_rank'] == 1]

Unnamed: 0,id,blockchain,collection,contract,tokenId,creators,lazySupply,pending,mintedAt,lastUpdatedAt,...,mul_attribute_prob,sum_attribute_score,mean_attribute_score,mul_attribute_score,sum_attribute_prob_rank,mean_attribute_prob_rank,mul_attribute_prob_rank,sum_attribute_score_rank,mean_attribute_score_rank,mul_attribute_score_rank
4581,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,ETHEREUM:0xbc4ca0eda7647a8ab7c2061c2e118a18a93...,8135,[{'account': 'ETHEREUM:0x536835937de4340f73d98...,0,[],2022-02-21T04:59:11Z,2022-02-21T04:59:11Z,...,6.443575e-12,662.900974,94.700139,155193400000.0,1915.0,1908.0,6.0,1.0,1.0,6.0
