O objetivo dessa análise é verificar se as características da rede de vendedores de NFTs em 2020 podem ajudar a prever os preços médios desses NFTs em 2021. A hipótese central é que o preço de um NFT não depende apenas de quem o vende, mas também da rede social em torno desse vendedor.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Bibliotecas

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
# Aqui é testando o tqdm
from tqdm import tqdm
import time

## Carregando o grafo do ano 2020

In [None]:
# dados20 = pd.read_csv('/content/drive/MyDrive/blockchain_network/dataset/data2020.csv')
# dados21 = pd.read_csv('/content/drive/MyDrive/blockchain_network/dataset/data2021.csv')

dados20 = pd.read_csv('data2020.csv')
dados21 = pd.read_csv('data2021.csv')

In [3]:
dados20.head()

Unnamed: 0,ID_token,Seller_address,Buyer_address,Price_USD,Datetime_updated,Collection_cleaned,Category
0,111864485.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.36014,2020-01-31,Godsunchained,Games
1,124368257.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.450175,2020-01-31,Godsunchained,Games
2,8389284.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.135052,2020-01-31,Godsunchained,Games
3,99946150.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.144056,2020-01-31,Godsunchained,Games
4,17163902.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.090035,2020-01-31,Godsunchained,Games


###  Selecionar NFTs de 2020 que foram negociadas em 2021

In [4]:
id_tokens = set(dados21['ID_token']) # --> aqui tô pegando os nfts de 2020 que também foram compratilhados em 2021

filter = dados20[dados20['ID_token'].isin(id_tokens)]
filter # --> nesse dataset aqui estão as transações que aparecem os NFTs que foram comercializados em 2020 e 2021

Unnamed: 0,ID_token,Seller_address,Buyer_address,Price_USD,Datetime_updated,Collection_cleaned,Category
713,7418509.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x0008d343091ef8bd3efa730f6aae5a26a285c7a2,71.825850,2020-01-29,Godsunchained,Games
990,40581338.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x2e044e95be6207474b381fb3721b782809cf6af8,10.493581,2020-01-29,Godsunchained,Games
1010,124329580.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x901c38bad8fdcfd42c10230d2fb151d4afa5017b,15.655179,2020-01-28,Godsunchained,Games
3486,104652409.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x16cac4f493db73c12ff9febbf794be4a31dc865d,479.970000,2020-01-25,Godsunchained,Games
3702,111789380.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x16cac4f493db73c12ff9febbf794be4a31dc865d,77.404800,2020-01-24,Godsunchained,Games
...,...,...,...,...,...,...,...
1245941,8701,0x171ab540b9cb730626db91f648e2b09eb5363484,0x916d82e095ff6ba820d1daef029420c4007b0c08,456.961400,2020-12-01,Pgfk,Utility
1245942,85605,0x83b62309d64ed613fa8f6395cd7ed84c23b79149,0xc29969b398ff2a42e0280e783b83be3557c38b1e,15.031625,2020-12-01,Rari,Art
1245943,40,0xb0b0a3e99a89ddb6c03e2254d337062d7bcef9cf,0x17465f2a70b6c6b39e333615c27cecba3f937a8b,91.807548,2020-12-01,Duckdaohunter,Collectible
1245951,9004,0xfd68082c61f208183cf55b309de214206b8f0082,0x01e4625e44820daf8526730b79f0ed4c023c37a0,73.895468,2020-12-01,Superrare,Art


In [5]:
filter

Unnamed: 0,ID_token,Seller_address,Buyer_address,Price_USD,Datetime_updated,Collection_cleaned,Category
713,7418509.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x0008d343091ef8bd3efa730f6aae5a26a285c7a2,71.825850,2020-01-29,Godsunchained,Games
990,40581338.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x2e044e95be6207474b381fb3721b782809cf6af8,10.493581,2020-01-29,Godsunchained,Games
1010,124329580.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x901c38bad8fdcfd42c10230d2fb151d4afa5017b,15.655179,2020-01-28,Godsunchained,Games
3486,104652409.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x16cac4f493db73c12ff9febbf794be4a31dc865d,479.970000,2020-01-25,Godsunchained,Games
3702,111789380.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x16cac4f493db73c12ff9febbf794be4a31dc865d,77.404800,2020-01-24,Godsunchained,Games
...,...,...,...,...,...,...,...
1245941,8701,0x171ab540b9cb730626db91f648e2b09eb5363484,0x916d82e095ff6ba820d1daef029420c4007b0c08,456.961400,2020-12-01,Pgfk,Utility
1245942,85605,0x83b62309d64ed613fa8f6395cd7ed84c23b79149,0xc29969b398ff2a42e0280e783b83be3557c38b1e,15.031625,2020-12-01,Rari,Art
1245943,40,0xb0b0a3e99a89ddb6c03e2254d337062d7bcef9cf,0x17465f2a70b6c6b39e333615c27cecba3f937a8b,91.807548,2020-12-01,Duckdaohunter,Collectible
1245951,9004,0xfd68082c61f208183cf55b309de214206b8f0082,0x01e4625e44820daf8526730b79f0ed4c023c37a0,73.895468,2020-12-01,Superrare,Art


In [None]:
filter.to_csv('/content/drive/MyDrive/blockchain_network/dataset/nfts-20-21.csv')

### Carregar o grafo de 2020

In [6]:
graph20 = pd.read_csv('graph_2020')
graph20.head()

Unnamed: 0.1,Unnamed: 0,origin,destiny,value,transactions
0,0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,25943.372174,897
1,1,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x98c0fe160b049d3436d2c3c26bf454fcfad2a8c1,1088.202781,192
2,2,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x4644d112db93f1c2f012a66a582f286846db8687,458.860506,37
3,3,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0fec1a6b8453873041c7529906de5c1acc1b26a,436.942208,75
4,4,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x9254f7f72bc6294ad6569d1ab78139121db880f6,715.399528,185


In [7]:
graph20.shape

(478110, 5)

- Para cada nó, calcular a média dos NFTs comprados e a média dos NFTS vendidos

In [8]:
dados20.head()

Unnamed: 0,ID_token,Seller_address,Buyer_address,Price_USD,Datetime_updated,Collection_cleaned,Category
0,111864485.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.36014,2020-01-31,Godsunchained,Games
1,124368257.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.450175,2020-01-31,Godsunchained,Games
2,8389284.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.135052,2020-01-31,Godsunchained,Games
3,99946150.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.144056,2020-01-31,Godsunchained,Games
4,17163902.0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xa0a0eaa4760d99f9173003185d0efa5bed675e78,0.090035,2020-01-31,Godsunchained,Games


### Aqui criei um novo grafo apenas com as transações que correspondem aos nfts transacionados nos dois anos

In [10]:
def create_graph(graph20): # --> essa função aqui é só pra criar o grafo (questões de organização msm)

  graph = nx.DiGraph()

  for index, row in graph20.iterrows():
    seller = row['Seller_address']
    buyer = row['Buyer_address']
    value = row['Price_USD']

    if graph.has_edge(seller, buyer):
      graph[seller][buyer]['value'] += value
      graph[seller][buyer]['transactions'] += 1
    else:
     graph.add_edge(seller, buyer, value=value, transactions=1)

  edge_labels = {}
  for u, v, dados in graph.edges(data=True):
      edge_labels[(u, v)] = f"Valor: {dados['value']}\nTransações: {dados['transactions']}"

  edges_data = []

  for origin, destiny, dados in tqdm(graph.edges(data=True), desc = "Criando grafo"):
      edges_data.append({
          'origin': origin,
          'destiny': destiny,
          'value': dados['value'],
          'transactions': dados['transactions']
      })

  df_edges = pd.DataFrame(edges_data)
  return graph, df_edges

In [11]:
test_graph, df_graph = create_graph(filter)

Criando grafo: 100%|████████████████| 125648/125648 [00:00<00:00, 608456.30it/s]


In [12]:
df_graph.head()

Unnamed: 0,origin,destiny,value,transactions
0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x0008d343091ef8bd3efa730f6aae5a26a285c7a2,71.82585,1
1,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x2e044e95be6207474b381fb3721b782809cf6af8,19.529882,2
2,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x901c38bad8fdcfd42c10230d2fb151d4afa5017b,15.655179,1
3,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0x16cac4f493db73c12ff9febbf794be4a31dc865d,557.3748,2
4,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,0xbce3bd3b206946abbe094903ae2b4244b52fb4e9,20.1575,1


### Calculando as métrcas de rede

- Calculo das métricas de rede definidas e utilizadas para a realização das análises e predição

In [None]:
def seller_buyer_nfts(dados20, graph20): # essa função aqui calcula a média (R$) de NFTS comprados e vendidos por cada nó

    graph20['origin'] = graph20['origin'].astype(str)
    graph20['destiny'] = graph20['destiny'].astype(str)
    dados20['Seller_address'] = dados20['Seller_address'].astype(str)
    dados20['Buyer_address'] = dados20['Buyer_address'].astype(str)

    dados20.loc[:, 'origin'] = dados20['Seller_address']
    dados20.loc[:, 'destiny'] = dados20['Buyer_address']

    # --> aqui tá unindo os dados (esta parte do código não é usada para as métricas do grafo, mas é mantida)
    merged_df = graph20.merge(dados20, on=['origin', 'destiny'], how='left')

    # --> pego os nós unicos concatenando origem + destino
    nodes = pd.concat( [graph20['origin'], graph20['destiny']]).unique()
    node_data = pd.DataFrame(nodes, columns=['node']) # --> nesse novo dataset aqui vou armazenar as novas métricas


    origin_NFT = dados20.groupby('Seller_address')['Price_USD'].mean()
    destiny_NFT = dados20.groupby('Buyer_address')['Price_USD'].mean()

    node_data['NFT_mean_origin'] = node_data['node'].map(origin_NFT)
    node_data['NFT_mean_destiny'] = node_data['node'].map(destiny_NFT)

    # Aqui é importante preencher NaN, pois nem todo nó pode ter comprado E vendido
    node_data['NFT_mean_origin'] = node_data['NFT_mean_origin'].fillna(0)
    node_data['NFT_mean_destiny'] = node_data['NFT_mean_destiny'].fillna(0)

    graph = nx.DiGraph()
    # As arestas do seu grafo já possuem o atributo 'value', que será usado como 'weight'.
    for _, row in tqdm(graph20.iterrows(), total=len(graph20), desc="Adicionando arestas ao grafo"):
        graph.add_edge(row['origin'], row['destiny'], weight=row['value']) # 'weight' é o 'value' do seu dataframe

    # 5 - Para cada nó calcular o grau (x3)
    in_degree = dict(graph.in_degree())
    out_degree = dict(graph.out_degree())

    node_data['in_degree'] = node_data['node'].map(in_degree).fillna(0)
    node_data['out_degree'] = node_data['node'].map(out_degree).fillna(0)

    # Adicionando atributos de média de venda/compra para acesso posterior nos vizinhos
    for index, row in tqdm(node_data.iterrows(), total = len(node_data), desc="Adicionando atributos aos nós do grafo"):
        graph.nodes[row['node']]['mean_sale'] = row['NFT_mean_origin']
        graph.nodes[row['node']]['mean_buy'] = row['NFT_mean_destiny']


    # Para cada nó calcular a média das médias dos nfts comprados pelos seus vizinhos diretos (x4)
    # Para cada nó calcular a média das médias dos nfts vendidos pelos seus vizinhos diretos (x5)
    for node in tqdm(graph.nodes(), total = len(graph.nodes()), desc = "Calculando médias dos vizinhos diretos"):
      neighbors = list(graph.neighbors(node)) # vizinhos de saída
      if not neighbors:
        node_data.loc[node_data['node'] == node, 'mean_seller_neighbors'] = 0
        node_data.loc[node_data['node'] == node, 'mean_buyer_neighbors'] = 0
        continue

      medias_vendas_vizinhos = [graph.nodes[neigh]['mean_sale'] for neigh in neighbors]
      medias_compras_vizinhos = [graph.nodes[neigh]['mean_buy'] for neigh in neighbors]

      node_data.loc[node_data['node'] == node, 'mean_seller_neighbors'] = sum(medias_vendas_vizinhos) / len(medias_vendas_vizinhos)
      node_data.loc[node_data['node'] == node, 'mean_buyer_neighbors'] = sum(medias_compras_vizinhos) / len(medias_compras_vizinhos)

    print('O cálculo do coeficiente de clusterização começou agora')
    clustering_coeficient = nx.clustering(graph) # Para grafos direcionados, pode usar nx.clustering ou nx.transitivity
    node_data['clustering_coeficient'] = node_data['node'].map(clustering_coeficient).fillna(0) # Preencher NaN para nós isolados

    # --- NOVAS MÉTRICAS SOLICITADAS ---

    # 1. Grau Ponderado (Weighted In-Degree e Weighted Out-Degree)
    print("Calculando Grau Ponderado (Weighted Degree)")
    # O 'weight' aqui se refere ao 'value' da aresta que você adicionou.
    in_weighted_degree = dict(graph.in_degree(weight='weight'))
    out_weighted_degree = dict(graph.out_degree(weight='weight'))

    node_data['in_weighted_degree'] = node_data['node'].map(in_weighted_degree).fillna(0)
    node_data['out_weighted_degree'] = node_data['node'].map(out_weighted_degree).fillna(0)

    # # 2. Centralidade de Intermediação (Betweenness Centrality)
    # print("Calculando Centralidade de Intermediação (Betweenness Centrality)")
    
    # betweenness_centrality = nx.betweenness_centrality(graph, weight='weight')
    # node_data['betweenness_centrality'] = node_data['node'].map(betweenness_centrality).fillna(0)

    # # 3. Centralidade de Autovetor (Eigenvector Centrality) ou PageRank
    # # PageRank é geralmente mais robusto para grafos direcionados e mais fácil de convergir.
    # print("Calculando PageRank (como medida de Centralidade de Autovetor)")
    # try:
    #     # max_iter e tol podem ser ajustados para ajudar na convergência para grafos grandes.
    #     pagerank = nx.pagerank(graph, weight='weight', max_iter=1000, tol=1e-06)
    #     node_data['pagerank'] = node_data['node'].map(pagerank).fillna(0)
    # except nx.PowerIterationFailedConvergence:
    #     print("Aviso: PageRank não convergiu após max_iter. Tente aumentar max_iter ou relaxar tol.")
    #     # Se não convergir, pode preencher com 0 ou NaN, dependendo da sua estratégia de tratamento de erros.
    #     node_data['pagerank'] = 0 # Ou np.nan

    return node_data

In [14]:
d = seller_buyer_nfts(filter, df_graph)
d.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados20['Seller_address'] = dados20['Seller_address'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados20['Buyer_address'] = dados20['Buyer_address'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dados20.loc[:, 'origin'] = dados20['Seller_address']
A value is trying to b

O cálculo do coeficiente de clusterização começou agora
Calculando Grau Ponderado (Weighted Degree)
Calculando Centralidade de Intermediação (Betweenness Centrality)
Calculando PageRank (como medida de Centralidade de Autovetor)


Unnamed: 0,node,NFT_mean_origin,NFT_mean_destiny,in_degree,out_degree,mean_seller_neighbors,mean_buyer_neighbors,clustering_coeficient,in_weighted_degree,out_weighted_degree,betweenness_centrality,pagerank
0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,32.067865,0.0,0,46,40.174862,55.122011,0.017874,0.0,2405.089855,0.0,1.3e-05
1,0x0008d343091ef8bd3efa730f6aae5a26a285c7a2,17.206069,42.68173,141,5,102.036117,32.2775,0.005857,8450.982481,86.030344,0.001013205,0.000852
2,0x2e044e95be6207474b381fb3721b782809cf6af8,24.299095,11.326764,25,16,57.806879,126.519027,0.009146,385.109977,510.280987,0.001751702,8.1e-05
3,0x901c38bad8fdcfd42c10230d2fb151d4afa5017b,7.76479,14.503422,2,2,44.158705,14.838145,0.083333,29.006843,23.294369,8.210009e-09,1.3e-05
4,0xbce3bd3b206946abbe094903ae2b4244b52fb4e9,23.874219,6.960332,30,107,220.908546,107.939004,0.022649,341.056257,3867.623524,0.004229695,7.1e-05


In [15]:
d.to_csv('node_data_v2.csv', index = False)

In [17]:
pd.read_csv('node_data_v2.csv').head()

Unnamed: 0,node,NFT_mean_origin,NFT_mean_destiny,in_degree,out_degree,mean_seller_neighbors,mean_buyer_neighbors,clustering_coeficient,in_weighted_degree,out_weighted_degree,betweenness_centrality,pagerank
0,0x4ef40d1bf0983899892946830abf99eca2dbc5ce,32.067865,0.0,0,46,40.174862,55.122011,0.017874,0.0,2405.089855,0.0,1.3e-05
1,0x0008d343091ef8bd3efa730f6aae5a26a285c7a2,17.206069,42.68173,141,5,102.036117,32.2775,0.005857,8450.982481,86.030344,0.001013205,0.000852
2,0x2e044e95be6207474b381fb3721b782809cf6af8,24.299095,11.326764,25,16,57.806879,126.519027,0.009146,385.109977,510.280987,0.001751702,8.1e-05
3,0x901c38bad8fdcfd42c10230d2fb151d4afa5017b,7.76479,14.503422,2,2,44.158705,14.838145,0.083333,29.006843,23.294369,8.210009e-09,1.3e-05
4,0xbce3bd3b206946abbe094903ae2b4244b52fb4e9,23.874219,6.960332,30,107,220.908546,107.939004,0.022649,341.056257,3867.623524,0.004229695,7.1e-05
