***Can only retrieve up to 1000 data points from Flipside API***

In [1]:
import pandas as pd
import numpy as np 
import requests
import json
import time
from flipside import Flipside
import os
import traceback
from dotenv import load_dotenv
import datetime as dt
import plotly.express as px
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
#from prophet import Prophet
from dash import Dash, html, dcc, Input, Output, State, callback
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from dash import dash_table



from sql.sql_scripts import mints_query, sales_query, eth_price_query

In [2]:
load_dotenv()

True

In [3]:
pd.options.display.float_format = '{:,.2f}'.format


In [4]:
opensea_api_key = os.getenv('opensea_api_key')

***Listing Data***

In [5]:
def fetch_listings(api_key, delay_between_requests=1):
    base_url = "https://api.opensea.io/api/v2/listings/collection/3dns-powered-domains/all"
    headers = {
        "accept": "application/json",
        "x-api-key": api_key
    }
    params = {"limit": 100} 

    listings = []
    page_count = 0

    while True:
        response = requests.get(base_url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            fetched_listings = data.get("listings", [])
            listings.extend(fetched_listings)
            page_count += 1
            
            # Extract and print the cursor
            next_cursor = data.get("next")
            print(f"Page {page_count}, Cursor: {next_cursor}, Listings Fetched: {len(fetched_listings)}")
            
            if next_cursor:
                params['next'] = next_cursor  # Update the 'next' parameter for the next request
            else:
                break  # No more pages to fetch
                
            # Implementing delay
            time.sleep(delay_between_requests)
            
        else:
            print(f"Failed to fetch data: {response.status_code}")
            break

    print(f"Total pages fetched: {page_count}")
    print(f"Total listings fetched: {len(listings)}")
    
    df = pd.DataFrame(listings)
    return df

***Descriptions***

In [6]:
def save_last_identifier(identifier):
    with open("last_identifier.txt", "w") as file:
        file.write(identifier)

def load_last_identifier():
    try:
        with open("last_identifier.txt", "r") as file:
            return file.read().strip()
    except FileNotFoundError:
        return None

In [7]:
def fetch_all_descriptions(api_key, delay_between_requests=1):
    base_url = "https://api.opensea.io/api/v2/collection/3dns-powered-domains/nfts"
    headers = {
        "accept": "application/json",
        "x-api-key": api_key
    }
    params = {"limit": 100}

    all_descriptions = []

    page_count = 0
    last_identifier = load_last_identifier()

    while True:
        if last_identifier:
            params['last_identifier'] = last_identifier

        response = requests.get(base_url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            fetched_descriptions = data.get("nfts", [])
            
            if not fetched_descriptions:
                break

            # Process only name and identifier for each description
            for description in fetched_descriptions:
                processed_description = {
                    "name": description.get('name'),
                    "identifier": description.get('identifier')
                }
                all_descriptions.append(processed_description)
            
            # Update the last_identifier to the latest one fetched
            last_identifier = fetched_descriptions[-1].get('identifier')
            save_last_identifier(last_identifier)
            
            page_count += 1
            next_cursor = data.get("next")
            print(f"Page {page_count}, Cursor: {next_cursor} Descriptions Fetched: {len(fetched_descriptions)}, total fetched: {len(all_descriptions)}")
            
            if next_cursor:
                params['next'] = next_cursor
            else:
                break  # No more pages to fetch

            time.sleep(delay_between_requests)
        else:
            print(f"Failed to fetch data: {response.status_code}")
            break

    print(f"Total pages fetched: {page_count}, Total descriptions fetched: {len(all_descriptions)}")
    
    # Save the processed descriptions to a file
    df = pd.DataFrame(all_descriptions)
    return df

***Events***

In [8]:
import json
import os

def save_last_timestamp(event_type, timestamp):
    data = {}
    if os.path.exists("last_timestamps.json"):
        with open("last_timestamps.json", "r") as file:
            data = json.load(file)
    data[event_type] = timestamp
    with open("last_timestamps.json", "w") as file:
        json.dump(data, file)

def load_last_timestamp(event_type):
    if os.path.exists("last_timestamps.json"):
        with open("last_timestamps.json", "r") as file:
            data = json.load(file)
        return data.get(event_type, None)
    return None

In [9]:
def fetch_event_type(api_key, event_type, all_events, params, headers):
    base_url = f"https://api.opensea.io/api/v2/events/collection/3dns-powered-domains"
    params['event_type'] = event_type
    
    # Load the last timestamp/identifier
    last_timestamp = load_last_timestamp(event_type)
    if last_timestamp:
        params['occurred_after'] = last_timestamp
    
    page_count = 0
    while True:
        response = requests.get(base_url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            fetched_events = data.get("asset_events", [])
            all_events.extend(fetched_events)
            
            if fetched_events:
                # Update the last timestamp/identifier to the latest one fetched
                last_event_time = fetched_events[-1].get("created_date")
                save_last_timestamp(event_type, last_event_time)
            
            page_count += 1
            next_cursor = data.get("next")
            print(f"Fetching {event_type}: Page {page_count}, Events Fetched: {len(fetched_events)}, Total Events: {len(all_events)}, next cursor: {next_cursor}")
            
            if next_cursor:
                params['next'] = next_cursor
            else:
                break  # No more pages to fetch

            time.sleep(1)  # Delay between requests
        else:
            print(f"Failed to fetch {event_type} data: HTTP {response.status_code}, Response: {response.text}")
            break

def fetch_all_events(api_key):
    headers = {
        "accept": "application/json",
        "x-api-key": api_key
    }
    params = {
        "limit": 50  # Adjust the limit as needed
    }

    all_events = []

    # Fetch listings
    fetch_event_type(api_key, "listing", all_events, params.copy(), headers)

    # Fetch sales
    fetch_event_type(api_key, "sale", all_events, params.copy(), headers)

    # Save the fetched events to a DataFrame
    print(f"Total events fetched: {len(all_events)}")
    df = pd.DataFrame(all_events)
    return df 

***Flipside Data***

In [10]:
flipside_api_key = os.getenv("FLIPSIDE_API_KEY")
flipside = Flipside(flipside_api_key, "https://api-v2.flipsidecrypto.xyz")

In [11]:
def flipside_api_results(query):
  query_result_set = flipside.query(query)
  # what page are we starting on?
  current_page_number = 1

  # How many records do we want to return in the page?
  page_size = 1000

  # set total pages to 1 higher than the `current_page_number` until
  # we receive the total pages from `get_query_results` given the 
  # provided `page_size` (total_pages is dynamically determined by the API 
  # based on the `page_size` you provide)

  total_pages = 2


  # we'll store all the page results in `all_rows`
  all_rows = []

  while current_page_number <= total_pages:
    results = flipside.get_query_results(
      query_result_set.query_id,
      page_number=current_page_number,
      page_size=page_size
    )

    total_pages = results.page.totalPages
    if results.records:
        all_rows = all_rows + results.records
    
    current_page_number += 1

  return pd.DataFrame(all_rows)

***Data Retrieval/Processing***

In [12]:
mint_df = flipside_api_results(mints_query)
mint_df.to_csv('data/mint_data.csv')

mint_df = pd.read_csv('data/mint_data.csv')
mint_df

In [13]:
sales_df = flipside_api_results(sales_query)
sales_df.to_csv('data/sales_data.csv')


sales_df = pd.read_csv('data/sales_data.csv')
sales_df

In [14]:
eth_usd_df = flipside_api_results(eth_price_query)
eth_usd_df.to_csv('data/eth_usd.csv')


eth_usd_df = pd.read_csv('data/eth_usd.csv')
eth_usd_df

events_df = fetch_all_events(api_key= opensea_api_key)
events_df.to_json('data/events_data.json', orient='records', date_format='iso')


In [15]:
events_df = pd.read_json('data/events_data.json', orient='records')
events_df

Unnamed: 0,event_type,order_hash,order_type,chain,protocol_address,start_date,expiration_date,asset,quantity,maker,taker,payment,criteria,event_timestamp,is_private_listing,closing_date,nft,seller,buyer,transaction
0,order,0x46f734ffea0b7f6054b72927c9df96e6692045e8fe39...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720648237.00,1723326637.00,{'identifier': '105524492317869377796516137615...,1,0x1ec410d35b81369d5c0d8fcadb340fdecce5c250,,"{'quantity': '50000000000000000000', 'token_ad...",{},1720648284,0.00,,,,,
1,order,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720647918.00,1723326318.00,{'identifier': '932502001321155872797741020477...,1,0x1ec410d35b81369d5c0d8fcadb340fdecce5c250,,"{'quantity': '50000000000000000000', 'token_ad...",{},1720648171,0.00,,,,,
2,order,0x20707ea7356cbee5f71943f71c95e15c23ac05b7bf58...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720647759.00,1721252536.00,{'identifier': '493513286051206672797082654485...,1,0x0d3f5a7a1ee78e743e25c18e66942fcbcd84ccad,,"{'quantity': '770000000000000000', 'token_addr...",{},1720648122,0.00,,,,,
3,order,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720562975.00,1736460564.00,{'identifier': '106968891900401063741132344938...,1,0x386ae4d6db89e0bbd41ef8cba13460edcf867420,,"{'quantity': '33000000000000000000', 'token_ad...",{},1720563049,0.00,,,,,
4,order,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720561057.00,1721165912.00,{'identifier': '283604669650136775214415072033...,1,0x9a93ae4f408a1e7d178650ca9f7ab2135c512eda,,"{'quantity': '1000000000000000000', 'token_add...",{},1720561062,0.00,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2996,sale,0x8bde44f2d46537cf8d025cc498eaf7bdeadce3631f15...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '1000000000000000', 'token_addres...",,1706970257,,1706970257.00,{'identifier': '260923785748676206958654735781...,0xcc0e0b9ebd4ce352f5a50b3ae77ea01202c284ee,0xc48a8ac51df7b71e500a5580b89e47ab26ca1472,0x60e952a8e0fa8d052791b2e4a22fd52eb3c759511e62...
2997,sale,0xc0547b5b8d575e3ba16b529ee2530a35c437bc75fa93...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '110000000000000000', 'token_addr...",,1706653465,,1706653465.00,{'identifier': '566479911085778512201556021651...,0x240ad467a71210629d71d4de22ebde27951c83fc,0x82eb45562f991329ed2867f43fc60f0ba52c3dab,0x6672189deb7829838aabba20ddeb66a3cb5fee5b55a2...
2998,sale,0xff4b9bdd3f140f8211864ef34e0a07ea0a92eaf98f7d...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '10000000000000000', 'token_addre...",,1706203283,,1706203283.00,{'identifier': '364706086468987072255605061503...,0x7b363822d744143d51c69b0757165c987a6e4ef4,0xe68e8cc7ff772b026c062b9cc28246676d044947,0xd8e71f4d52c43fa770b9aa7ad7d5cfb073f04c7495e0...
2999,sale,0x03188e15aaf9596307117e30355f6b2ac79665fbd1c2...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '150000000000000000', 'token_addr...",,1704080589,,1704080589.00,{'identifier': '992340878683635895851755157396...,0x31b32020fb1bdf0228c58b80590f07b235f2b0ce,0x64233eaa064ef0d54ff1a963933d0d2d46ab5829,0x186d1ceacf620bab710d11cf928b79e5dd46790ff1d5...


descriptions_df = fetch_all_descriptions(api_key= opensea_api_key)
descriptions_df.to_json('data/descriptions_data.json', orient='records', date_format='iso')

In [16]:
descriptions_df = pd.read_json('data/descriptions_data.json', orient='records')
descriptions_df['name'].tail(20)

16399     tokentroopers.xyz
16400            mudbox.xyz
16401     beallclothing.com
16402          brandley.xyz
16403        tameimpala.xyz
16404          efferium.org
16405           wizkika.com
16406    onchaindomains.org
16407              3dns.box
16408                   box
16409               finance
16410                   inc
16411                  link
16412                   wtf
16413                   xyz
16414                  tech
16415                    io
16416                   org
16417                   net
16418                   com
Name: name, dtype: object

listings_df = fetch_listings(api_key= opensea_api_key, delay_between_requests=1)
listings_df.to_json('data/listings_data.json', orient='records', date_format='iso')

In [17]:
listings_df = pd.read_json('data/listings_data.json', orient='records')
listings_df

Unnamed: 0,order_hash,chain,type,price,protocol_data,protocol_address
0,0x902ff72f57b579d6a2fac92a0714ca5274172ccdeb51...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x64233eaa064ef0d5...,0x00000000000000adc04c56bf30ac9d3c0aaf14dc
1,0x472882ed6ef6df3e646a968fe399a794cb2d80783c0d...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x47baba9b83c7cd48...,0x00000000000000adc04c56bf30ac9d3c0aaf14dc
2,0x3040ad3f37193e684cb2baa0811d21cd345108df5a2a...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x97f6f6c5503b9fe3...,0x00000000000000adc04c56bf30ac9d3c0aaf14dc
3,0xb62518c46df32c07046a53f3df61112cc4460bc648ac...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x97f6f6c5503b9fe3...,0x00000000000000adc04c56bf30ac9d3c0aaf14dc
4,0xe4c2e069a35f4b943e449d8e510f952ff3cb2fc133d3...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x99d104c7a9d51953...,0x00000000000000adc04c56bf30ac9d3c0aaf14dc
...,...,...,...,...,...,...
707,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x9a93ae4f408a1e7d...,0x0000000000000068f116a894984e2db1123eb395
708,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x386ae4d6db89e0bb...,0x0000000000000068f116a894984e2db1123eb395
709,0x20707ea7356cbee5f71943f71c95e15c23ac05b7bf58...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x0d3f5a7a1ee78e74...,0x0000000000000068f116a894984e2db1123eb395
710,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,optimism,basic,"{'current': {'currency': 'ETH', 'decimals': 18...",{'parameters': {'offerer': '0x1ec410d35b81369d...,0x0000000000000068f116a894984e2db1123eb395


In [18]:
def unpack_protocol_data(row):
    protocol_data = row['protocol_data']
    parameters = protocol_data.get('parameters', {})
    consideration = parameters.get('consideration', [{}])
    offer = parameters.get('offer', [{}])
    price = row['price']['current']
    
    chain = row['chain']
    order_hash = row['order_hash']
    currency = price.get('currency')
    price_string = price.get('value')
    price_in_eth = float(price_string) / (10 ** price.get('decimals', 18))
    primary_recipient = consideration[0].get('recipient') if consideration else None
    identifier_or_criteria = offer[0].get('identifierOrCriteria') if offer else None
    start_time = parameters.get('startTime')
    end_time = parameters.get('endTime')
    
    return pd.Series([
        chain, order_hash, currency, price_string, price_in_eth, 
        primary_recipient, identifier_or_criteria, start_time, end_time
    ])

In [19]:
unpacked_columns = listings_df.apply(unpack_protocol_data, axis=1)
unpacked_columns.columns = [
    'chain', 'order_hash', 'currency', 'price_string', 'price_in_eth', 
    'primary_recipient', 'identifier_or_criteria', 'start_time', 'end_time'
]
listings_df = pd.concat([listings_df, unpacked_columns], axis=1)
listings_df = listings_df.drop(columns=['protocol_data'])


In [20]:
listings_df['identifier_or_criteria'] = listings_df['identifier_or_criteria'].astype(float)
listings_df.rename(columns={'identifier_or_criteria':'tokenid'}, inplace=True)

In [21]:
listings_df.drop(columns=['price_string','currency','primary_recipient','chain','price','protocol_address','type'], inplace=True)

In [22]:
descriptions_df.rename(columns={'identifier':'tokenid'}, inplace=True)

In [23]:
descriptions_df['tokenid'] = descriptions_df['tokenid'].astype(float)

In [24]:
listings_with_names = listings_df.merge(descriptions_df, how='left', on='tokenid')
listings_with_names

Unnamed: 0,order_hash,order_hash.1,price_in_eth,tokenid,start_time,end_time,name
0,0x902ff72f57b579d6a2fac92a0714ca5274172ccdeb51...,0x902ff72f57b579d6a2fac92a0714ca5274172ccdeb51...,5.00,"24,976,820,562,208,875,018,004,955,422,466,815,...",1705460459,1721181652,eth.attorney
1,0x472882ed6ef6df3e646a968fe399a794cb2d80783c0d...,0x472882ed6ef6df3e646a968fe399a794cb2d80783c0d...,8.00,"103,348,240,472,784,936,049,086,084,648,101,639...",1705973310,1721694506,open.box
2,0x3040ad3f37193e684cb2baa0811d21cd345108df5a2a...,0x3040ad3f37193e684cb2baa0811d21cd345108df5a2a...,3.00,"102,295,845,026,135,496,883,892,522,779,159,999...",1706042993,1721767789,jewelry.box
3,0xb62518c46df32c07046a53f3df61112cc4460bc648ac...,0xb62518c46df32c07046a53f3df61112cc4460bc648ac...,3.00,"75,270,179,101,124,303,647,088,469,843,731,665,...",1706043120,1721767916,gay.box
4,0xe4c2e069a35f4b943e449d8e510f952ff3cb2fc133d3...,0xe4c2e069a35f4b943e449d8e510f952ff3cb2fc133d3...,8.00,"15,351,911,394,148,160,117,736,899,619,150,406,...",1706069852,1721791047,power.box
...,...,...,...,...,...,...,...
707,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,1.00,"28,360,466,965,013,677,414,172,494,359,945,146,...",1720561057,1721165912,cypherpunk.box
708,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,33.00,"106,968,891,900,401,057,912,681,673,593,283,028...",1720562975,1736460564,blum.box
709,0x20707ea7356cbee5f71943f71c95e15c23ac05b7bf58...,0x20707ea7356cbee5f71943f71c95e15c23ac05b7bf58...,0.77,"49,351,328,605,120,670,449,409,649,496,819,358,...",1720647759,1721252536,onchainbot.ai
710,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,50.00,"93,250,200,132,115,581,735,009,190,879,184,742,...",1720647918,1723326318,pharmacy.box


In [25]:
box_listings = listings_with_names[listings_with_names['name'].str.endswith('.box')]
box_listings

Unnamed: 0,order_hash,order_hash.1,price_in_eth,tokenid,start_time,end_time,name
1,0x472882ed6ef6df3e646a968fe399a794cb2d80783c0d...,0x472882ed6ef6df3e646a968fe399a794cb2d80783c0d...,8.00,"103,348,240,472,784,936,049,086,084,648,101,639...",1705973310,1721694506,open.box
2,0x3040ad3f37193e684cb2baa0811d21cd345108df5a2a...,0x3040ad3f37193e684cb2baa0811d21cd345108df5a2a...,3.00,"102,295,845,026,135,496,883,892,522,779,159,999...",1706042993,1721767789,jewelry.box
3,0xb62518c46df32c07046a53f3df61112cc4460bc648ac...,0xb62518c46df32c07046a53f3df61112cc4460bc648ac...,3.00,"75,270,179,101,124,303,647,088,469,843,731,665,...",1706043120,1721767916,gay.box
4,0xe4c2e069a35f4b943e449d8e510f952ff3cb2fc133d3...,0xe4c2e069a35f4b943e449d8e510f952ff3cb2fc133d3...,8.00,"15,351,911,394,148,160,117,736,899,619,150,406,...",1706069852,1721791047,power.box
5,0x8bc67ab0f6355d8c78a317dee43dd93aad665ffa66d0...,0x8bc67ab0f6355d8c78a317dee43dd93aad665ffa66d0...,69.00,"92,045,738,473,223,622,079,151,157,139,670,821,...",1706083009,1721804202,dan.box
...,...,...,...,...,...,...,...
706,0x3682a15b341d33dd93f859a3029596e346769720a6ea...,0x3682a15b341d33dd93f859a3029596e346769720a6ea...,1.69,"27,770,965,087,809,130,203,304,471,643,289,691,...",1720527777,1720786974,ia.box
707,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,1.00,"28,360,466,965,013,677,414,172,494,359,945,146,...",1720561057,1721165912,cypherpunk.box
708,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,33.00,"106,968,891,900,401,057,912,681,673,593,283,028...",1720562975,1736460564,blum.box
710,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,50.00,"93,250,200,132,115,581,735,009,190,879,184,742,...",1720647918,1723326318,pharmacy.box


In [26]:
events_df_copy = events_df.copy()
events_df_copy

Unnamed: 0,event_type,order_hash,order_type,chain,protocol_address,start_date,expiration_date,asset,quantity,maker,taker,payment,criteria,event_timestamp,is_private_listing,closing_date,nft,seller,buyer,transaction
0,order,0x46f734ffea0b7f6054b72927c9df96e6692045e8fe39...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720648237.00,1723326637.00,{'identifier': '105524492317869377796516137615...,1,0x1ec410d35b81369d5c0d8fcadb340fdecce5c250,,"{'quantity': '50000000000000000000', 'token_ad...",{},1720648284,0.00,,,,,
1,order,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720647918.00,1723326318.00,{'identifier': '932502001321155872797741020477...,1,0x1ec410d35b81369d5c0d8fcadb340fdecce5c250,,"{'quantity': '50000000000000000000', 'token_ad...",{},1720648171,0.00,,,,,
2,order,0x20707ea7356cbee5f71943f71c95e15c23ac05b7bf58...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720647759.00,1721252536.00,{'identifier': '493513286051206672797082654485...,1,0x0d3f5a7a1ee78e743e25c18e66942fcbcd84ccad,,"{'quantity': '770000000000000000', 'token_addr...",{},1720648122,0.00,,,,,
3,order,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720562975.00,1736460564.00,{'identifier': '106968891900401063741132344938...,1,0x386ae4d6db89e0bbd41ef8cba13460edcf867420,,"{'quantity': '33000000000000000000', 'token_ad...",{},1720563049,0.00,,,,,
4,order,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720561057.00,1721165912.00,{'identifier': '283604669650136775214415072033...,1,0x9a93ae4f408a1e7d178650ca9f7ab2135c512eda,,"{'quantity': '1000000000000000000', 'token_add...",{},1720561062,0.00,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2996,sale,0x8bde44f2d46537cf8d025cc498eaf7bdeadce3631f15...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '1000000000000000', 'token_addres...",,1706970257,,1706970257.00,{'identifier': '260923785748676206958654735781...,0xcc0e0b9ebd4ce352f5a50b3ae77ea01202c284ee,0xc48a8ac51df7b71e500a5580b89e47ab26ca1472,0x60e952a8e0fa8d052791b2e4a22fd52eb3c759511e62...
2997,sale,0xc0547b5b8d575e3ba16b529ee2530a35c437bc75fa93...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '110000000000000000', 'token_addr...",,1706653465,,1706653465.00,{'identifier': '566479911085778512201556021651...,0x240ad467a71210629d71d4de22ebde27951c83fc,0x82eb45562f991329ed2867f43fc60f0ba52c3dab,0x6672189deb7829838aabba20ddeb66a3cb5fee5b55a2...
2998,sale,0xff4b9bdd3f140f8211864ef34e0a07ea0a92eaf98f7d...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '10000000000000000', 'token_addre...",,1706203283,,1706203283.00,{'identifier': '364706086468987072255605061503...,0x7b363822d744143d51c69b0757165c987a6e4ef4,0xe68e8cc7ff772b026c062b9cc28246676d044947,0xd8e71f4d52c43fa770b9aa7ad7d5cfb073f04c7495e0...
2999,sale,0x03188e15aaf9596307117e30355f6b2ac79665fbd1c2...,,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,,,,1,,,"{'quantity': '150000000000000000', 'token_addr...",,1704080589,,1704080589.00,{'identifier': '992340878683635895851755157396...,0x31b32020fb1bdf0228c58b80590f07b235f2b0ce,0x64233eaa064ef0d54ff1a963933d0d2d46ab5829,0x186d1ceacf620bab710d11cf928b79e5dd46790ff1d5...


In [27]:
bids = events_df_copy[events_df_copy['event_type'] == 'order']
sales = events_df_copy[events_df_copy['event_type'] == 'sale']

bids

Unnamed: 0,event_type,order_hash,order_type,chain,protocol_address,start_date,expiration_date,asset,quantity,maker,taker,payment,criteria,event_timestamp,is_private_listing,closing_date,nft,seller,buyer,transaction
0,order,0x46f734ffea0b7f6054b72927c9df96e6692045e8fe39...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720648237.00,1723326637.00,{'identifier': '105524492317869377796516137615...,1,0x1ec410d35b81369d5c0d8fcadb340fdecce5c250,,"{'quantity': '50000000000000000000', 'token_ad...",{},1720648284,0.00,,,,,
1,order,0xc2c81d45617f524e0b38a8828484e8fc14129d65ecff...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720647918.00,1723326318.00,{'identifier': '932502001321155872797741020477...,1,0x1ec410d35b81369d5c0d8fcadb340fdecce5c250,,"{'quantity': '50000000000000000000', 'token_ad...",{},1720648171,0.00,,,,,
2,order,0x20707ea7356cbee5f71943f71c95e15c23ac05b7bf58...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720647759.00,1721252536.00,{'identifier': '493513286051206672797082654485...,1,0x0d3f5a7a1ee78e743e25c18e66942fcbcd84ccad,,"{'quantity': '770000000000000000', 'token_addr...",{},1720648122,0.00,,,,,
3,order,0x49729029191017c544a3f67f03968dd12bf16c69e1dd...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720562975.00,1736460564.00,{'identifier': '106968891900401063741132344938...,1,0x386ae4d6db89e0bbd41ef8cba13460edcf867420,,"{'quantity': '33000000000000000000', 'token_ad...",{},1720563049,0.00,,,,,
4,order,0xaedefdd5ee2c58433e4135132b5b8454653725d40859...,listing,optimism,0x0000000000000068f116a894984e2db1123eb395,1720561057.00,1721165912.00,{'identifier': '283604669650136775214415072033...,1,0x9a93ae4f408a1e7d178650ca9f7ab2135c512eda,,"{'quantity': '1000000000000000000', 'token_add...",{},1720561062,0.00,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2902,order,0x3bbb21c5a1766b47ea91b2dafd73e7cb28198b41b3c6...,listing,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,1703615460.00,1706293860.00,{'identifier': '596691372334868032834900148779...,1,0xec00481d13a4028e0b76b031cf5c863604cb2d4b,,"{'quantity': '3000000000000000000', 'token_add...",{},1703615466,0.00,,,,,
2903,order,0xa2931b385c3bd5c237a44cc39a452e9f12499ba6cafd...,listing,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,1703615375.00,1706293775.00,{'identifier': '178657174778441322562715723560...,1,0xec00481d13a4028e0b76b031cf5c863604cb2d4b,,"{'quantity': '3000000000000000000', 'token_add...",{},1703615382,0.00,,,,,
2904,order,0x0fff804500518519b60164f5a10ef76e63da9ee77775...,listing,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,1703615237.00,1706293637.00,{'identifier': '887409359338823895470224549460...,1,0x64233eaa064ef0d54ff1a963933d0d2d46ab5829,,"{'quantity': '3000000000000000000', 'token_add...",{},1703615249,0.00,,,,,
2905,order,0x3e6ab1b4b98dd45a1cd97fc141659077293095161497...,listing,optimism,0x00000000000000adc04c56bf30ac9d3c0aaf14dc,1703607296.00,1704212091.00,{'identifier': '117315147600270829665702926801...,1,0x7af4ca674c86ac74d15ae668ae7ffbe4f7ddafbf,,"{'quantity': '1000000000000000000', 'token_add...",{},1703607304,0.00,,,,,


In [28]:
bids['identifier'] = bids['asset'].apply(lambda x: x.get('identifier') if isinstance(x, dict) else None)
sales['identifier'] = sales['nft'].apply(lambda x: x.get('identifier') if isinstance(x, dict) else None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids['identifier'] = bids['asset'].apply(lambda x: x.get('identifier') if isinstance(x, dict) else None)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sales['identifier'] = sales['nft'].apply(lambda x: x.get('identifier') if isinstance(x, dict) else None)


In [29]:
bids['identifier'] = bids['identifier'].astype(float)
bids.rename(columns={'identifier':'tokenid'}, inplace=True) 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids['identifier'] = bids['identifier'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bids.rename(columns={'identifier':'tokenid'}, inplace=True)


In [30]:
bids = bids.merge(descriptions_df, how='left', on='tokenid')

In [31]:
bids.columns

Index(['event_type', 'order_hash', 'order_type', 'chain', 'protocol_address',
       'start_date', 'expiration_date', 'asset', 'quantity', 'maker', 'taker',
       'payment', 'criteria', 'event_timestamp', 'is_private_listing',
       'closing_date', 'nft', 'seller', 'buyer', 'transaction', 'tokenid',
       'name'],
      dtype='object')

In [32]:
bids.drop(columns=['protocol_address','chain','maker','criteria','is_private_listing','closing_date','nft','seller','buyer'], inplace=True)

In [33]:
sales['identifier'] = sales['identifier'].astype(float)
sales.rename(columns={'identifier':'tokenid'}, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sales['identifier'] = sales['identifier'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sales.rename(columns={'identifier':'tokenid'}, inplace=True)


In [34]:
sales = sales.merge(descriptions_df, how='left', on='tokenid')
sales.drop(columns=['order_type', 'chain','start_date','expiration_date','asset','maker','is_private_listing','protocol_address','criteria'], inplace=True)

In [35]:
box_sales_os = sales[sales['name'].str.endswith('.box')]
box_bids_os = bids[bids['name'].str.endswith('.box')] 

In [36]:
box_bids_os.columns = [f'bid_{col}' if col != 'name' else col for col in box_bids_os.columns]

In [37]:
box_sales_os.columns = [f'sale_{col}' if col != 'name' else col for col in box_sales_os.columns]

In [38]:
box_listings_and_sales = pd.merge(box_bids_os, box_sales_os, how='inner', on='name')
box_listings_and_sales['bid_event_timestamp'] = pd.to_datetime(box_listings_and_sales['bid_event_timestamp'], unit='s')
box_listings_and_sales['sale_event_timestamp'] = pd.to_datetime(box_listings_and_sales['sale_event_timestamp'], unit='s')


In [39]:
filtered_box_listings_and_sales = box_listings_and_sales[box_listings_and_sales['sale_event_timestamp'] > box_listings_and_sales['bid_event_timestamp']]
filtered_box_listings_and_sales['time_diff'] = filtered_box_listings_and_sales['sale_event_timestamp'] - filtered_box_listings_and_sales['bid_event_timestamp']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_box_listings_and_sales['time_diff'] = filtered_box_listings_and_sales['sale_event_timestamp'] - filtered_box_listings_and_sales['bid_event_timestamp']


In [40]:
avg_time_to_sell = filtered_box_listings_and_sales['time_diff'].mean()
print(avg_time_to_sell)

19 days 11:19:25.506097561


In [41]:
closest_listings = filtered_box_listings_and_sales.loc[filtered_box_listings_and_sales.groupby(['name', 'sale_event_timestamp'])['time_diff'].idxmin()]


In [42]:
closest_listings['sale_quantity'] = closest_listings['sale_payment'].apply(lambda x: int(x['quantity']) / 10**18)
closest_listings['listing_quantity'] = closest_listings['bid_payment'].apply(lambda x: int(x['quantity']) / 10**18)


In [43]:
closest_listings[['bid_event_timestamp','listing_quantity']].head()

Unnamed: 0,bid_event_timestamp,listing_quantity
33,2024-04-23 10:22:52,0.03
68,2024-02-12 18:32:17,0.04
51,2024-02-24 06:41:03,0.04
64,2024-02-19 23:52:15,0.04
56,2024-02-23 04:00:29,0.1


In [44]:
closest_listings['bid_event_timestamp'] = closest_listings['bid_event_timestamp'].dt.strftime('%Y-%m-%d %H:00:00')


In [45]:
closest_listings['sale_event_timestamp'] = closest_listings['sale_event_timestamp'].dt.strftime('%Y-%m-%d %H:00:00')

In [46]:
mint_df['tokenid'] = mint_df['tokenid'].astype(float)

In [47]:
mints_with_names = pd.merge(mint_df, descriptions_df, how='left', on='tokenid')

In [48]:
mints_with_names_null = mints_with_names[mints_with_names.isnull().any(axis=1)]
print(list(mints_with_names_null['tx_hash']))

['0xf3fe57b0418d3f6b3f95dc94585f7c1d3af0a8c1e94574df32d48ac2e12bf151', '0xf3fe57b0418d3f6b3f95dc94585f7c1d3af0a8c1e94574df32d48ac2e12bf151', '0x254894247bb1b37bd563e29b61e60c183bfdfc33b13251a88b5e3fd976f7a525', '0x254894247bb1b37bd563e29b61e60c183bfdfc33b13251a88b5e3fd976f7a525', '0xc834fc6ce827bec875439feec0fcc9bc698c2c74ba0e02a47ada863181eea465', '0xc834fc6ce827bec875439feec0fcc9bc698c2c74ba0e02a47ada863181eea465', '0xee71ed978fa78ebfe902f8800d344fe2a8ee91ce840b786932ef4b917712b592', '0xee71ed978fa78ebfe902f8800d344fe2a8ee91ce840b786932ef4b917712b592', '0xeff0647a1457a41fd483c5a807985d5bfb3d4d74fa214b9bad20377e0a5017e8', '0xeff0647a1457a41fd483c5a807985d5bfb3d4d74fa214b9bad20377e0a5017e8', '0x797479e07d791a0f79d0433a514dbb4fcd196e57a047c701e768d61f4d6fc7c8', '0x797479e07d791a0f79d0433a514dbb4fcd196e57a047c701e768d61f4d6fc7c8', '0x4b19b2871109c40c80ae709a599edea386639794904ec26638a365efaf245965', '0x4b19b2871109c40c80ae709a599edea386639794904ec26638a365efaf245965', '0xb40404c76df837b0

In [49]:
mints_with_names.drop_duplicates('tokenid', inplace=True)

In [50]:
mints_with_names.drop(columns=['__row_index','tx_hash','tokenid'], inplace=True)

In [51]:
mints_with_names.set_index('day', inplace=True)

In [52]:
mints_with_names.index = pd.to_datetime(mints_with_names.index)
mints_with_names.dropna(inplace=True)

In [53]:
box_domains_mints = mints_with_names[mints_with_names['name'].str.endswith('.box')]


In [54]:
daily_box_mints = box_domains_mints.resample('D').count()

In [55]:
daily_box_mints.rename(columns={'name':'mints'}, inplace=True)
daily_box_mints_fig = px.bar(daily_box_mints, x=daily_box_mints.index, y='mints', title='Daily Mints')
# daily_box_mints_fig.show()

In [56]:
total_box_mints = box_domains_mints.count().iloc[0]
total_box_mints

np.int64(3614)

In [57]:
sales_with_names = pd.merge(sales_df, descriptions_df, how='left', on='tokenid')

In [58]:
sales_with_names.drop_duplicates('tokenid', inplace=True)
sales_with_names.drop(columns=['__row_index','tx_hash','tokenid'], inplace=True)
sales_with_names.set_index('day', inplace=True)
sales_with_names.index = pd.to_datetime(sales_with_names.index)

In [59]:
sales_with_names

Unnamed: 0_level_0,price,name
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-03-27 14:00:00+00:00,0.0,justgivemethestuffplease.bio
2024-04-03 01:00:00+00:00,0.0,champathedevourergroup.xyz
2024-04-17 18:00:00+00:00,0.01,mewtwo.box
2024-04-24 01:00:00+00:00,0.0,cryptoapp.one
2024-04-24 04:00:00+00:00,0.03,0000.box
2024-04-24 01:00:00+00:00,0.0,stazidoghairartists.pro
2024-04-25 21:00:00+00:00,0.1,express.box
2024-04-27 16:00:00+00:00,10.0,vm.box
2024-04-27 00:00:00+00:00,0.37,jane.box
2024-04-28 07:00:00+00:00,0.1,good.box


sales_with_names.dropna(inplace=True)

In [60]:
box_domains_sales = sales_with_names[sales_with_names['name'].str.endswith('.box')]
box_domains_sales

Unnamed: 0_level_0,price,name
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-04-17 18:00:00+00:00,0.01,mewtwo.box
2024-04-24 04:00:00+00:00,0.03,0000.box
2024-04-25 21:00:00+00:00,0.1,express.box
2024-04-27 16:00:00+00:00,10.0,vm.box
2024-04-27 00:00:00+00:00,0.37,jane.box
2024-04-28 07:00:00+00:00,0.1,good.box
2024-04-28 01:00:00+00:00,0.07,man.box
2024-05-07 22:00:00+00:00,0.06,onyx.box
2024-02-03 22:00:00+00:00,0.01,696.box
2024-02-05 04:00:00+00:00,0.02,song.box


In [61]:
eth_usd_df.set_index('day', inplace=True)
eth_usd_df.index = pd.to_datetime(eth_usd_df.index)
eth_usd_df.drop(columns=['__row_index'], inplace=True)

In [62]:
eth_usd_df.rename(columns={'price':'eth_usd'}, inplace=True)

In [63]:
box_listings['start_time'] = pd.to_datetime(box_listings['start_time'], unit='s').dt.strftime('%Y-%m-%d %H:00:00')
box_listings['end_time'] = pd.to_datetime(box_listings['end_time'], unit='s').dt.strftime('%Y-%m-%d %H:00:00')


The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The behavior of 'to_datetime' with 'unit' when parsing strings is deprecated. In a future version, strings will be parsed as datetime strings, matching the behavior without a 'unit'. To retain the old behavior, explicitly cast ints or floats to numeric type before calling to_datetime.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documenta

In [64]:
eth_usd_df_copy = eth_usd_df.reset_index().copy()
eth_usd_df_copy.rename(columns={'day':'start_time'}, inplace=True)

In [65]:
eth_usd_df_copy['start_time'] = pd.to_datetime(eth_usd_df_copy['start_time']).dt.tz_localize(None)

In [66]:
box_listings['start_time'] = pd.to_datetime(box_listings['start_time']) 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [67]:
eth_usd_df_copy_2 = eth_usd_df_copy.copy()
eth_usd_df_copy_2.rename(columns={'start_time':'bid_event_timestamp', 'eth_usd':'eth_usd_bid'}, inplace=True)

In [68]:
eth_usd_df_copy_3 = eth_usd_df_copy.copy()
eth_usd_df_copy_3.rename(columns={'start_time':'sale_event_timestamp', 'eth_usd':'eth_usd_sale'}, inplace=True)

In [69]:
closest_listings['bid_event_timestamp'] = pd.to_datetime(closest_listings['bid_event_timestamp'])
closest_listings['sale_event_timestamp'] = pd.to_datetime(closest_listings['sale_event_timestamp'])

In [70]:
closest_listings = closest_listings.merge(eth_usd_df_copy_2, how='left', on='bid_event_timestamp')

In [71]:
closest_listings = closest_listings.merge(eth_usd_df_copy_3, how='left', on='sale_event_timestamp')

In [72]:
closest_listings['sale_usd'] = closest_listings['sale_quantity'] * closest_listings['eth_usd_sale']
closest_listings['list_usd'] = closest_listings['listing_quantity'] * closest_listings['eth_usd_bid']

In [73]:
closest_listings['percent_change'] = (closest_listings['sale_usd'] - closest_listings['list_usd']) / closest_listings['list_usd'] * 100

In [74]:
listing_price_to_sale_avg_pct_change = closest_listings['percent_change'].mean()
print(listing_price_to_sale_avg_pct_change)

-14.673687659116787


In [75]:

box_listings = box_listings.merge(eth_usd_df_copy, how='left', on='start_time') 

In [76]:
box_listings.drop(columns=['order_hash'], inplace=True)

In [77]:
box_listings['price_in_usd_start_time'] = box_listings['price_in_eth'] * box_listings['eth_usd']

In [78]:
box_listings.set_index('start_time', inplace=True)
box_listings_max_daily = box_listings['price_in_usd_start_time'].resample('D').max()

In [79]:
box_listings_num_daily = box_listings['name'].resample('D').count()
total_box_listings = box_listings_num_daily.sum()
total_box_listings

np.int64(531)

In [80]:
box_listings_min_daily = box_listings['price_in_usd_start_time'].resample('D').min()
box_listings_avg_daily = box_listings['price_in_usd_start_time'].resample('D').mean()

In [81]:
box_listings_max_daily.fillna(0, inplace=True)
box_listings_min_daily.fillna(0, inplace=True)
box_listings_avg_daily.fillna(0, inplace=True)

In [82]:
box_listing_data = pd.merge(box_listings_num_daily.to_frame('listings'), box_listings_max_daily.to_frame('max_price'), left_index=True,
                            right_index=True, how='inner')

In [83]:
box_listing_data = box_listing_data.merge(box_listings_min_daily.to_frame('min_price'), left_index=True,
                                          right_index=True, how='inner')

box_listing_data = box_listing_data.merge(box_listings_avg_daily.to_frame('avg_price'), left_index=True,
                                          right_index=True, how='inner')

In [84]:
monthly_listings = box_listings['name'].resample('M').count()
monthly_listings


'M' is deprecated and will be removed in a future version, please use 'ME' instead.



start_time
2024-01-31     36
2024-02-29     17
2024-03-31     11
2024-04-30     93
2024-05-31    144
2024-06-30    173
2024-07-31     57
Freq: ME, Name: name, dtype: int64

In [85]:
def monthly_listings_growth_rate(listings):
    previous_month = listings.shift(1)
    growth_rate = ((listings - previous_month) / previous_month) * 100
    return growth_rate

In [86]:
listings_growth_rate = monthly_listings_growth_rate(monthly_listings)


In [87]:
listings_growth_rate.dropna(inplace=True)
listings_growth_rate

start_time
2024-02-29   -52.78
2024-03-31   -35.29
2024-04-30   745.45
2024-05-31    54.84
2024-06-30    20.14
2024-07-31   -67.05
Freq: ME, Name: name, dtype: float64

In [88]:
box_domains_sales = box_domains_sales.merge(eth_usd_df, left_index=True, right_index=True, how='left')

In [89]:
box_domains_sales['price_usd'] = box_domains_sales['price'] * box_domains_sales['eth_usd']
box_domains_sales.rename(columns={'price':'price_eth'}, inplace=True)

In [90]:
box_domains_sales.drop(columns=['eth_usd'], inplace=True)


In [91]:
box_domains_sales.sort_index(inplace=True)
box_domains_mints.sort_index(inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [92]:
box_domains_sales = box_domains_sales[['name', 'price_usd','price_eth']]


In [93]:
max_eth_sale = box_domains_sales['price_eth'].max()
max_usd_sale = box_domains_sales['price_usd'].max()

# Retrieve the corresponding timestamps
max_eth_sale_row = box_domains_sales.loc[box_domains_sales['price_eth'].idxmax()]
max_usd_sale_row = box_domains_sales.loc[box_domains_sales['price_usd'].idxmax()]

# Display the results
print(f"Maximum sale: \n {max_eth_sale_row}")


Maximum sale: 
 name           vm.box
price_usd   31,481.47
price_eth       10.00
Name: 2024-04-27 16:00:00+00:00, dtype: object


In [94]:
total_box_sales = box_domains_sales['name'].count()
print(f'total .box domain sales as of {dt.datetime.today()} : {total_box_sales}')

total .box domain sales as of 2024-07-10 19:48:47.497738 : 21


In [95]:
daily_box_sales = box_domains_sales['name'].resample('D').count()
daily_box_sales

day
2024-02-03 00:00:00+00:00    1
2024-02-04 00:00:00+00:00    0
2024-02-05 00:00:00+00:00    1
2024-02-06 00:00:00+00:00    0
2024-02-07 00:00:00+00:00    0
                            ..
2024-05-03 00:00:00+00:00    0
2024-05-04 00:00:00+00:00    0
2024-05-05 00:00:00+00:00    0
2024-05-06 00:00:00+00:00    0
2024-05-07 00:00:00+00:00    1
Freq: D, Name: name, Length: 95, dtype: int64

In [96]:
daily_box_vol = box_domains_sales['price_usd'].resample('D').sum()
cumulative_box_vol = daily_box_vol.cumsum()
cumulative_box_vol

day
2024-02-03 00:00:00+00:00       33.09
2024-02-04 00:00:00+00:00       33.09
2024-02-05 00:00:00+00:00       78.69
2024-02-06 00:00:00+00:00       78.69
2024-02-07 00:00:00+00:00       78.69
                               ...   
2024-05-03 00:00:00+00:00   35,720.31
2024-05-04 00:00:00+00:00   35,720.31
2024-05-05 00:00:00+00:00   35,720.31
2024-05-06 00:00:00+00:00   35,720.31
2024-05-07 00:00:00+00:00   35,888.14
Freq: D, Name: price_usd, Length: 95, dtype: float64

In [97]:
daily_box_sales_fig = px.bar(daily_box_sales.to_frame('sales'), x=daily_box_sales.index, y='sales', title='Daily Sales')
# daily_box_sales_fig.show()

In [98]:
latest_box_domains_sales = box_domains_sales.iloc[-10:] 
latest_box_domains_sales

Unnamed: 0_level_0,name,price_usd,price_eth
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-02-25 03:00:00+00:00,015.box,104.68,0.04
2024-02-25 07:00:00+00:00,38.box,301.73,0.1
2024-04-17 18:00:00+00:00,mewtwo.box,29.61,0.01
2024-04-24 04:00:00+00:00,0000.box,93.92,0.03
2024-04-25 21:00:00+00:00,express.box,317.33,0.1
2024-04-27 00:00:00+00:00,jane.box,1151.71,0.37
2024-04-27 16:00:00+00:00,vm.box,31481.47,10.0
2024-04-28 01:00:00+00:00,man.box,224.6,0.07
2024-04-28 07:00:00+00:00,good.box,314.61,0.1
2024-05-07 22:00:00+00:00,onyx.box,167.83,0.06


In [99]:
latest_box_domains_mints = box_domains_mints.iloc[-10:]
# latest_box_domains_mints

In [100]:
cumulative_box_mints = daily_box_mints.cumsum()
cumulative_box_mints.rename(columns={'mints':'cumulative mints'}, inplace=True)

In [101]:
daily_mint_metrics = daily_box_mints.merge(cumulative_box_mints, left_index=True, right_index=True, how='inner')
# daily_mint_metrics

In [102]:
daily_mint_metrics_fig = make_subplots(specs=[[{"secondary_y": True}]])

daily_mint_metrics_fig.add_trace(
    go.Bar(
        x=daily_mint_metrics.index,
        y=daily_mint_metrics['mints'],
        name='Mints'
    ),
    secondary_y=False
)

daily_mint_metrics_fig.add_trace(
    go.Scatter(
        x=daily_mint_metrics.index,
        y=daily_mint_metrics['cumulative mints'],
        name='Cumulative Mints',
        mode='lines'
    ),
    secondary_y=True
)

daily_mint_metrics_fig.update_xaxes(title_text="Date")

# daily_mint_metrics_fig.show()


In [103]:
listings_growth_rate

start_time
2024-02-29   -52.78
2024-03-31   -35.29
2024-04-30   745.45
2024-05-31    54.84
2024-06-30    20.14
2024-07-31   -67.05
Freq: ME, Name: name, dtype: float64

In [104]:
listings_growth_rate_fig = px.bar(listings_growth_rate.to_frame('Monthly Listings Growth Rate'), x=listings_growth_rate.index,
                                   y='Monthly Listings Growth Rate', title='Monthly Listings Growth Rate')

# listings_growth_rate_fig.show()

In [105]:
cumulative_box_sales = daily_box_sales.cumsum()


In [106]:
monthly_max_sold = box_domains_sales['price_usd'].resample('M').max()
monthly_min_sold = box_domains_sales['price_usd'].resample('M').min()
monthly_avg_sold = box_domains_sales['price_usd'].resample('M').mean()
monthly_volume_usd = box_domains_sales['price_usd'].resample('M').sum()
monthly_num_sold = box_domains_sales['name'].resample('M').count()


'M' is deprecated and will be removed in a future version, please use 'ME' instead.


'M' is deprecated and will be removed in a future version, please use 'ME' instead.


'M' is deprecated and will be removed in a future version, please use 'ME' instead.


'M' is deprecated and will be removed in a future version, please use 'ME' instead.


'M' is deprecated and will be removed in a future version, please use 'ME' instead.



In [107]:
monthly_max_sold.fillna(0, inplace=True)
monthly_min_sold.fillna(0, inplace=True)
monthly_avg_sold.fillna(0, inplace=True)
monthly_volume_usd.fillna(0, inplace=True)
monthly_num_sold.fillna(0, inplace=True)

In [108]:
monthly_box_sales_metrics = pd.merge(monthly_max_sold.to_frame('max_price'), monthly_min_sold.to_frame('min_price'), left_index=True, right_index=True, how='inner')

In [109]:
monthly_box_sales_metrics = monthly_box_sales_metrics.merge(monthly_avg_sold.to_frame('avg_price'), left_index=True, right_index=True, how='inner')

In [110]:
monthly_box_sales_metrics = monthly_box_sales_metrics.merge(monthly_volume_usd.to_frame('volume_usd'), left_index=True, right_index=True, how='inner')

In [111]:
monthly_num_sold

day
2024-02-29 00:00:00+00:00    13
2024-03-31 00:00:00+00:00     0
2024-04-30 00:00:00+00:00     7
2024-05-31 00:00:00+00:00     1
Freq: ME, Name: name, dtype: int64

In [112]:
monthly_box_sales_metrics = monthly_box_sales_metrics.merge(monthly_num_sold.to_frame('domains sold'), left_index=True, right_index=True, how='inner')

In [113]:
monthly_box_sales_metrics

Unnamed: 0_level_0,max_price,min_price,avg_price,volume_usd,domains sold
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-02-29 00:00:00+00:00,301.73,33.09,162.08,2107.05,13
2024-03-31 00:00:00+00:00,0.0,0.0,0.0,0.0,0
2024-04-30 00:00:00+00:00,31481.47,29.61,4801.89,33613.26,7
2024-05-31 00:00:00+00:00,167.83,167.83,167.83,167.83,1


In [114]:
daily_sales_metrics = pd.merge(cumulative_box_sales.to_frame('cumulative_sales'), daily_box_sales.to_frame('daily_sales'), 
                               left_index=True, right_index=True, how='left')

In [115]:
daily_sales_metrics = daily_sales_metrics.merge(daily_box_vol.to_frame('vol_usd'), left_index=True, right_index=True, how='inner')
daily_sales_metrics = daily_sales_metrics.merge(cumulative_box_vol.to_frame('cumulative_vol'), left_index=True, right_index=True, how='inner')

In [116]:
daily_sales_metrics

Unnamed: 0_level_0,cumulative_sales,daily_sales,vol_usd,cumulative_vol
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-02-03 00:00:00+00:00,1,1,33.09,33.09
2024-02-04 00:00:00+00:00,1,0,0.00,33.09
2024-02-05 00:00:00+00:00,2,1,45.60,78.69
2024-02-06 00:00:00+00:00,2,0,0.00,78.69
2024-02-07 00:00:00+00:00,2,0,0.00,78.69
...,...,...,...,...
2024-05-03 00:00:00+00:00,20,0,0.00,35720.31
2024-05-04 00:00:00+00:00,20,0,0.00,35720.31
2024-05-05 00:00:00+00:00,20,0,0.00,35720.31
2024-05-06 00:00:00+00:00,20,0,0.00,35720.31


In [117]:
daily_vol_fig = make_subplots(specs=[[{"secondary_y": True}]])

daily_vol_fig.add_trace(
    go.Bar(
        x=daily_sales_metrics.index,
        y=daily_sales_metrics['vol_usd'],
        name='Sales Volume'
    ),
    secondary_y=False
)

daily_vol_fig.add_trace(
    go.Scatter(
        x=daily_sales_metrics.index,
        y=daily_sales_metrics['cumulative_vol'],
        name='Cumulative Sales Volume',
        mode='lines'
    ),
    secondary_y=True
)

daily_vol_fig.update_xaxes(title_text="Date")

# daily_vol_fig.show()


In [118]:
daily_sales_fig = make_subplots(specs=[[{"secondary_y": True}]])

daily_sales_fig.add_trace(
    go.Bar(
        x=daily_sales_metrics.index,
        y=daily_sales_metrics['daily_sales'],
        name='Sales'
    ),
    secondary_y=False
)

daily_sales_fig.add_trace(
    go.Scatter(
        x=daily_sales_metrics.index,
        y=daily_sales_metrics['cumulative_sales'],
        name='Cumulative Sales',
        mode='lines'
    ),
    secondary_y=True
)

daily_sales_fig.update_xaxes(title_text="Date")

# daily_sales_fig.show()


In [119]:
monthly_listings = monthly_listings.to_frame('listings')


In [120]:
monthly_sales = box_domains_sales['name'].resample('M').count()
monthly_sales = monthly_sales.reset_index()
monthly_sales['day'] = pd.to_datetime(monthly_sales['day']).dt.strftime('%Y-%m-%d')
monthly_sales.set_index('day', inplace=True)


'M' is deprecated and will be removed in a future version, please use 'ME' instead.



In [121]:
monthly_sales

Unnamed: 0_level_0,name
day,Unnamed: 1_level_1
2024-02-29,13
2024-03-31,0
2024-04-30,7
2024-05-31,1


In [122]:
monthly_listings.index

DatetimeIndex(['2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
               '2024-05-31', '2024-06-30', '2024-07-31'],
              dtype='datetime64[ns]', name='start_time', freq='ME')

In [123]:
monthly_sales.index = pd.to_datetime(monthly_sales.index)

In [124]:
monthly_sales_reindexed = monthly_sales.reindex(monthly_listings.index).fillna(0)
monthly_sales_reindexed


Unnamed: 0_level_0,name
start_time,Unnamed: 1_level_1
2024-01-31,0.0
2024-02-29,13.0
2024-03-31,0.0
2024-04-30,7.0
2024-05-31,1.0
2024-06-30,0.0
2024-07-31,0.0


In [125]:
monthly_listings['sales'] = monthly_sales_reindexed['name']

In [126]:
monthly_listings

Unnamed: 0_level_0,listings,sales
start_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-31,36,0.0
2024-02-29,17,13.0
2024-03-31,11,0.0
2024-04-30,93,7.0
2024-05-31,144,1.0
2024-06-30,173,0.0
2024-07-31,57,0.0


In [127]:
monthly_listings['listings_to_sales_ratio'] = monthly_listings['listings'] / monthly_listings['sales']
monthly_listings['listings_to_sales_ratio'].replace([float('inf'), -float('inf')], 0, inplace=True)

monthly_listings


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





Unnamed: 0_level_0,listings,sales,listings_to_sales_ratio
start_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-31,36,0.0,0.0
2024-02-29,17,13.0,1.31
2024-03-31,11,0.0,0.0
2024-04-30,93,7.0,13.29
2024-05-31,144,1.0,144.0
2024-06-30,173,0.0,0.0
2024-07-31,57,0.0,0.0


In [128]:
listing_to_sales_fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add bar chart for sales
listing_to_sales_fig.add_trace(
    go.Bar(
        x=monthly_listings.index,
        y=monthly_listings['sales'],
        name='Sales'
    ),
    secondary_y=False
)

listing_to_sales_fig.add_trace(
    go.Bar(
        x=monthly_listings.index,
        y=monthly_listings['listings'],
        name='Listings'
    ),
    secondary_y=False
)

# Add line chart for cumulative sales
listing_to_sales_fig.add_trace(
    go.Scatter(
        x=monthly_listings.index,
        y=monthly_listings['listings_to_sales_ratio'],
        name='Listings to Sales Ratio',
        mode='lines'
    ),
    secondary_y=True
)

# listing_to_sales_fig.show()


In [129]:
monthly_mints = box_domains_mints.resample('M').count()
monthly_mints.reset_index(inplace=True)
monthly_mints['day'] = pd.to_datetime(monthly_mints['day']).dt.strftime('%Y-%m-%d') 
print(monthly_mints)

          day  name
0  2023-10-31     5
1  2023-11-30     2
2  2023-12-31    23
3  2024-01-31  1745
4  2024-02-29   349
5  2024-03-31   162
6  2024-04-30   636
7  2024-05-31   414
8  2024-06-30   221
9  2024-07-31    57



'M' is deprecated and will be removed in a future version, please use 'ME' instead.



In [130]:
monthly_mints.set_index('day', inplace=True)


In [131]:
monthly_mints.index = pd.to_datetime(monthly_mints.index) 
monthly_mints.index

DatetimeIndex(['2023-10-31', '2023-11-30', '2023-12-31', '2024-01-31',
               '2024-02-29', '2024-03-31', '2024-04-30', '2024-05-31',
               '2024-06-30', '2024-07-31'],
              dtype='datetime64[ns]', name='day', freq=None)

In [132]:
monthly_sales_reindexed.index

DatetimeIndex(['2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
               '2024-05-31', '2024-06-30', '2024-07-31'],
              dtype='datetime64[ns]', name='start_time', freq='ME')

In [133]:
monthly_mints['sales'] = monthly_sales_reindexed['name'] 
monthly_mints.fillna(0, inplace=True)
monthly_mints['mint_to_sales_ratio'] = monthly_mints['name'] / monthly_mints['sales'] 
monthly_mints['mint_to_sales_ratio'].replace([float('inf'), -float('inf')], 0, inplace=True)
monthly_mints.rename(columns={'name':'mints'}, inplace=True)



A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





In [134]:
monthly_mints

Unnamed: 0_level_0,mints,sales,mint_to_sales_ratio
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-10-31,5,0.0,0.0
2023-11-30,2,0.0,0.0
2023-12-31,23,0.0,0.0
2024-01-31,1745,0.0,0.0
2024-02-29,349,13.0,26.85
2024-03-31,162,0.0,0.0
2024-04-30,636,7.0,90.86
2024-05-31,414,1.0,414.0
2024-06-30,221,0.0,0.0
2024-07-31,57,0.0,0.0


In [135]:
mint_to_sales_fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add bar chart for sales
mint_to_sales_fig.add_trace(
    go.Bar(
        x=monthly_mints.index,
        y=monthly_mints['sales'],
        name='Sales'
    ),
    secondary_y=False
)

mint_to_sales_fig.add_trace(
    go.Bar(
        x=monthly_mints.index,
        y=monthly_mints['mints'],
        name='Mints'
    ),
    secondary_y=False
)

# Add line chart for cumulative sales
mint_to_sales_fig.add_trace(
    go.Scatter(
        x=monthly_mints.index,
        y=monthly_mints['mint_to_sales_ratio'],
        name='Mints to Sales Ratio',
        mode='lines'
    ),
    secondary_y=True
)

# mint_to_sales_fig.show()


In [136]:
cumulative_listings_to_sales = total_box_listings / total_box_sales
print(cumulative_listings_to_sales)

25.285714285714285


In [137]:
cumulative_mint_to_sales = total_box_mints / total_box_sales
print(cumulative_mint_to_sales)

172.0952380952381


In [138]:
import os
print(os.getcwd())


e:\Projects\box_app


***Box Domains Valuation Model***

**Data Processing**

domain_path = 'E:/Projects/box_app/data/domain-name-sales.tsv'  
domain_data = pd.read_csv(domain_path, delimiter='\t')

In [139]:
domain_path = 'data/domain-name-sales.tsv'  
domain_data = pd.read_csv(domain_path, delimiter='\t')


In [140]:
domain_data.set_index('date', inplace=True)
domain_data = domain_data.drop(columns=['venue'])
domain_data.sort_index(inplace=True)

In [141]:
domain_data.index = pd.to_datetime(domain_data.index)
domain_data

Unnamed: 0_level_0,domain,price
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1999-04-01,altavista.com,3250000
1999-04-01,bingo.com,1100000
1999-11-01,fly.com,1500000
1999-12-01,autos.com,2200000
1999-12-01,england.com,2000000
...,...,...
2021-01-01,yes.movie,253
2021-01-01,yopal.com,405
2021-01-01,yougraph.com,1161
2021-01-01,zenvie.com,349


In [142]:
domain_data['domain_length'] = domain_data['domain'].apply(len)
domain_data['num_vowels'] = domain_data['domain'].apply(lambda x: sum([1 for char in x if char in 'aeiou']))
domain_data['num_consonants'] = domain_data['domain'].apply(lambda x: sum([1 for char in x if char.isalpha() and char not in 'aeiou']))
domain_data['tld'] = domain_data['domain'].apply(lambda x: x.split('.')[-1])  # Extract TLD


In [143]:
domain_data

Unnamed: 0_level_0,domain,price,domain_length,num_vowels,num_consonants,tld
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-04-01,altavista.com,3250000,13,5,7,com
1999-04-01,bingo.com,1100000,9,3,5,com
1999-11-01,fly.com,1500000,7,1,5,com
1999-12-01,autos.com,2200000,9,4,4,com
1999-12-01,england.com,2000000,11,3,7,com
...,...,...,...,...,...,...
2021-01-01,yes.movie,253,9,4,4,movie
2021-01-01,yopal.com,405,9,3,5,com
2021-01-01,yougraph.com,1161,12,4,7,com
2021-01-01,zenvie.com,349,10,4,5,com


In [144]:
box_domains_sales.columns

Index(['name', 'price_usd', 'price_eth'], dtype='object')

In [145]:
filtered_box = box_domains_sales.drop(columns=['price_eth'])
filtered_box.rename(columns={'name':'domain', 'price_usd':'price'}, inplace=True)


In [146]:
filtered_box['domain_length'] = filtered_box['domain'].apply(len)
filtered_box['num_vowels'] = filtered_box['domain'].apply(lambda x: sum([1 for char in x if char in 'aeiou']))
filtered_box['num_consonants'] = filtered_box['domain'].apply(lambda x: sum([1 for char in x if char.isalpha() and char not in 'aeiou']))
filtered_box['tld'] = filtered_box['domain'].apply(lambda x: x.split('.')[-1])  # Extract TLD


In [147]:
filtered_box.index = filtered_box.index.strftime('%Y-%m-%d')

In [148]:
filtered_box

Unnamed: 0_level_0,domain,price,domain_length,num_vowels,num_consonants,tld
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-02-03,696.box,33.09,7,1,2,box
2024-02-05,song.box,45.6,8,2,5,box
2024-02-10,to.box,96.97,6,2,3,box
2024-02-20,uae.box,203.19,7,4,2,box
2024-02-22,08.box,116.74,6,1,2,box
2024-02-24,404.box,203.68,7,1,2,box
2024-02-25,010.box,119.08,7,1,2,box
2024-02-25,70.box,269.17,6,1,2,box
2024-02-25,40.box,209.36,6,1,2,box
2024-02-25,ap.box,134.59,6,2,3,box


In [149]:
features = ['domain_length', 'num_vowels', 'num_consonants', 'tld']
X = domain_data[features]
y = domain_data['price']

In [150]:
# Preprocess categorical data (TLD) and handle missing values
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), ['domain_length', 'num_vowels', 'num_consonants']),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ]), ['tld'])
    ]
)

# Create a pipeline with Ridge regression
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', Ridge())
])

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'regressor__alpha': [0.1, 1.0, 10.0, 100.0, 1000.0]
}

In [151]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


**Ridge Regression**

In [152]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

# Best model from grid search
best_model = grid_search.best_estimator_

# Predict and evaluate
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Best Alpha: {grid_search.best_params_["regressor__alpha"]}')
print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

Best Alpha: 1000.0
MAE: 2705.0255351946857
MSE: 2852191402.3463864
R²: 0.0003925999206353392


**Random Forest Regressor**

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=5, random_state=42))
])

# Fit the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

**XGBoost**

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42))
])

# Fit the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

**LightGBM**

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LGBMRegressor(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42))
])

# Fit the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

**Cat Boost**

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', CatBoostRegressor(iterations=200, depth=5, learning_rate=0.1, random_state=42, verbose=0))
])

# Fit the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

**Prophet**

from sklearn.base import BaseEstimator, TransformerMixin

class ProphetRegressor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.model = Prophet()
        self.fitted_model = None

    def fit(self, X, y=None):
        df = pd.DataFrame({'ds': X.squeeze(), 'y': y})
        self.fitted_model = self.model.fit(df)
        return self

    def predict(self, X):
        future = pd.DataFrame({'ds': X.squeeze()})
        forecast = self.fitted_model.predict(future)
        return forecast['yhat'].values

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', ProphetRegressor())
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



**Best Model**

In [153]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', Ridge(alpha=1000.0))  # Set the best alpha value from grid search
])

In [154]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

MAE: 2705.0255351946857
MSE: 2852191402.3463864
R²: 0.0003925999206353392


In [155]:
filtered_box

Unnamed: 0_level_0,domain,price,domain_length,num_vowels,num_consonants,tld
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-02-03,696.box,33.09,7,1,2,box
2024-02-05,song.box,45.6,8,2,5,box
2024-02-10,to.box,96.97,6,2,3,box
2024-02-20,uae.box,203.19,7,4,2,box
2024-02-22,08.box,116.74,6,1,2,box
2024-02-24,404.box,203.68,7,1,2,box
2024-02-25,010.box,119.08,7,1,2,box
2024-02-25,70.box,269.17,6,1,2,box
2024-02-25,40.box,209.36,6,1,2,box
2024-02-25,ap.box,134.59,6,2,3,box


In [156]:
box_X = filtered_box[features]

# Predict prices for .box domains using the best model
filtered_box['predicted_price'] = pipeline.predict(box_X)

print(filtered_box[['domain', 'predicted_price']])

                 domain  predicted_price
day                                     
2024-02-03      696.box         1,168.93
2024-02-05     song.box         2,173.63
2024-02-10       to.box         3,066.39
2024-02-20      uae.box         3,180.12
2024-02-22       08.box         2,005.65
2024-02-24      404.box         1,168.93
2024-02-25      010.box         1,168.93
2024-02-25       70.box         2,005.65
2024-02-25       40.box         2,005.65
2024-02-25       ap.box         3,066.39
2024-02-25       75.box         2,005.65
2024-02-25      015.box         1,168.93
2024-02-25       38.box         2,005.65
2024-04-17   mewtwo.box         1,560.93
2024-04-24     0000.box           332.21
2024-04-25  express.box         1,114.55
2024-04-27     jane.box         2,453.69
2024-04-27       vm.box         2,786.34
2024-04-28      man.box         2,620.01
2024-04-28     good.box         2,453.69
2024-05-07     onyx.box         2,173.63


In [157]:
r2 = r2_score(filtered_box['price'], filtered_box['predicted_price'])
print(f'r2 {r2}')

r2 0.0411345241547072


**.Box Domain Valuator**

In [158]:
filtered_box_2 = filtered_box.drop(columns=['predicted_price'])
filtered_box_2

Unnamed: 0_level_0,domain,price,domain_length,num_vowels,num_consonants,tld
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-02-03,696.box,33.09,7,1,2,box
2024-02-05,song.box,45.6,8,2,5,box
2024-02-10,to.box,96.97,6,2,3,box
2024-02-20,uae.box,203.19,7,4,2,box
2024-02-22,08.box,116.74,6,1,2,box
2024-02-24,404.box,203.68,7,1,2,box
2024-02-25,010.box,119.08,7,1,2,box
2024-02-25,70.box,269.17,6,1,2,box
2024-02-25,40.box,209.36,6,1,2,box
2024-02-25,ap.box,134.59,6,2,3,box


In [159]:
combined_data = pd.concat([domain_data, filtered_box_2], ignore_index=True)

In [160]:
combined_data

Unnamed: 0,domain,price,domain_length,num_vowels,num_consonants,tld
0,altavista.com,3250000.00,13,5,7,com
1,bingo.com,1100000.00,9,3,5,com
2,fly.com,1500000.00,7,1,5,com
3,autos.com,2200000.00,9,4,4,com
4,england.com,2000000.00,11,3,7,com
...,...,...,...,...,...,...
348252,jane.box,1151.71,8,3,4,box
348253,vm.box,31481.47,6,1,4,box
348254,man.box,224.60,7,2,4,box
348255,good.box,314.61,8,3,4,box


In [161]:
X = combined_data[features]
y = combined_data['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')


MAE: 2708.9834718161696
MSE: 2845754262.6555114
R²: 0.0004163141819931715


In [162]:
def model_prep(data):
    data['domain_length'] = data['domain'].apply(len)
    data['num_vowels'] = data['domain'].apply(lambda x: sum([1 for char in x if char in 'aeiou']))
    data['num_consonants'] = data['domain'].apply(lambda x: sum([1 for char in x if char.isalpha() and char not in 'aeiou']))
    data['tld'] = data['domain'].apply(lambda x: x.split('.')[-1]) 
    return data

In [163]:
def value_domain(domain):
    domain_x = domain[features]
    value = pipeline.predict(domain_x)
    print(f'predicted value: {value[0]}')
    return value[0] 

In [164]:
test_domain = 'eth.box' ## for model, just have the person input before .box, have it automatically add .box
test_domain_df = pd.DataFrame({'domain': [test_domain]})
test_domain_processed = model_prep(test_domain_df)
test_domain_value = value_domain(test_domain_processed)

predicted value: 2560.052986420118


In [165]:
test_domain_value

np.float64(2560.052986420118)

***Dash App***

Domain valuator would be callback

In [166]:
latest_box_domains_sales.reset_index(inplace=True)

In [167]:
latest_box_domains_sales.sort_values(by='day', ascending=False, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [168]:
latest_box_domains_mints.reset_index(inplace=True)

In [169]:
latest_box_domains_mints.sort_values(by='day', ascending=False, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [170]:
avg_box_sale = box_domains_sales['price_usd'].mean()

In [171]:
highest_selling_domains = box_domains_sales[['name','price_usd','price_eth']].sort_values(by='price_usd', ascending=False)
highest_selling_domains = highest_selling_domains.head(10)
highest_selling_domains

Unnamed: 0_level_0,name,price_usd,price_eth
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-04-27 16:00:00+00:00,vm.box,31481.47,10.0
2024-04-27 00:00:00+00:00,jane.box,1151.71,0.37
2024-04-25 21:00:00+00:00,express.box,317.33,0.1
2024-04-28 07:00:00+00:00,good.box,314.61,0.1
2024-02-25 07:00:00+00:00,38.box,301.73,0.1
2024-02-25 03:00:00+00:00,75.box,269.17,0.09
2024-02-25 03:00:00+00:00,70.box,269.17,0.09
2024-04-28 01:00:00+00:00,man.box,224.6,0.07
2024-02-25 03:00:00+00:00,40.box,209.36,0.07
2024-02-24 04:00:00+00:00,404.box,203.68,0.07


In [172]:
monthly_box_sales_metrics['cumulative_volume'] = monthly_box_sales_metrics['volume_usd'].cumsum()
monthly_box_sales_metrics.reset_index(inplace=True) 
monthly_box_sales_metrics.sort_values(by='day', ascending=False, inplace=True)

In [173]:
monthly_box_sales_metrics['cumulative domains sold'] = monthly_box_sales_metrics['domains sold'].cumsum()

In [174]:
historical_listing_to_sales = closest_listings[['name','bid_event_timestamp','sale_event_timestamp',
                                                'list_usd','sale_usd','percent_change']]
historical_listing_to_sales.sort_values(by='bid_event_timestamp', ascending=False, inplace=True)
historical_listing_to_sales



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,name,bid_event_timestamp,sale_event_timestamp,list_usd,sale_usd,percent_change
23,r.box,2024-06-30 23:00:00,2024-07-06 05:00:00,3493.13,2003.8,-42.64
21,onyx.box,2024-05-07 20:00:00,2024-05-07 22:00:00,167.68,167.83,0.09
29,watch.box,2024-05-03 13:00:00,2024-05-06 15:00:00,74414.36,73634.98,-1.05
17,good.box,2024-04-28 07:00:00,2024-04-28 07:00:00,314.61,314.61,0.0
18,jane.box,2024-04-26 22:00:00,2024-04-27 00:00:00,1155.72,1151.71,-0.35
0,0000.box,2024-04-23 10:00:00,2024-04-24 04:00:00,91.95,93.92,2.15
19,man.box,2024-04-16 07:00:00,2024-04-28 01:00:00,211.4,224.6,6.25
16,express.box,2024-04-06 22:00:00,2024-04-25 21:00:00,336.25,317.33,-5.63
20,mewtwo.box,2024-03-28 23:00:00,2024-04-17 18:00:00,35.53,29.61,-16.66
2,015.box,2024-02-24 06:00:00,2024-02-25 03:00:00,102.37,104.68,2.26


In [175]:
box_listing_data.columns

Index(['listings', 'max_price', 'min_price', 'avg_price'], dtype='object')

In [176]:
box_listing_data.tail(20)

Unnamed: 0_level_0,listings,max_price,min_price,avg_price
start_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-06-21,1,175.09,175.09,175.09
2024-06-22,66,20952.36,523.49,2267.91
2024-06-23,0,0.0,0.0,0.0
2024-06-24,23,246537.0,22375.39,42584.38
2024-06-25,1,16982.5,16982.5,16982.5
2024-06-26,0,0.0,0.0,0.0
2024-06-27,4,1692400.0,611.53,424348.94
2024-06-28,3,3430.99,341.54,1659.49
2024-06-29,0,0.0,0.0,0.0
2024-06-30,0,0.0,0.0,0.0


In [177]:
highest_selling_domains

Unnamed: 0_level_0,name,price_usd,price_eth
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-04-27 16:00:00+00:00,vm.box,31481.47,10.0
2024-04-27 00:00:00+00:00,jane.box,1151.71,0.37
2024-04-25 21:00:00+00:00,express.box,317.33,0.1
2024-04-28 07:00:00+00:00,good.box,314.61,0.1
2024-02-25 07:00:00+00:00,38.box,301.73,0.1
2024-02-25 03:00:00+00:00,75.box,269.17,0.09
2024-02-25 03:00:00+00:00,70.box,269.17,0.09
2024-04-28 01:00:00+00:00,man.box,224.6,0.07
2024-02-25 03:00:00+00:00,40.box,209.36,0.07
2024-02-24 04:00:00+00:00,404.box,203.68,0.07


In [178]:
highest_selling_domains_fig = px.bar(highest_selling_domains, x=highest_selling_domains['name'], y=highest_selling_domains['price_usd'],
                                     title='10 Highest Selling Domains')
# highest_selling_domains_fig.show()

In [179]:


box_listing_data_fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add bar chart for sales
# box_listing_data_fig.add_trace(
#     go.Bar(
#         x=box_listing_data.index,
#         y=box_listing_data['max_price'],
#         name='Max Price'
#     ),
#     secondary_y=False
# )

# box_listing_data_fig.add_trace(
#     go.Bar(
#         x=box_listing_data.index,
#         y=box_listing_data['min_price'],
#         name='Min Price'
#     ),
#     secondary_y=False
# )

# Add line chart for cumulative sales
box_listing_data_fig.add_trace(
    go.Bar(
        x=box_listing_data.index,
        y=box_listing_data['avg_price'],
        name='Avg Price',
    ),
    secondary_y=False
)

box_listing_data_fig.add_trace(
    go.Scatter(
        x=box_listing_data.index,
        y=box_listing_data['listings'],
        name='Listings',
        mode='lines'
    ),
    secondary_y=True
)

# box_listing_data_fig.show()


In [180]:
box_listings.drop(columns=['tokenid','eth_usd'], inplace=True)


In [181]:
latest_box_listings = box_listings.sort_index(ascending=False)
latest_box_listings = latest_box_listings.head(10)
latest_box_listings.reset_index(inplace=True)

In [182]:
mint_to_sales_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Mints to Sales Metrics"
)

listing_to_sales_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Listings to Sales Metrics"
)

daily_box_mints_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Daily Mints"
)

daily_box_sales_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Daily Sales"
)

daily_vol_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Daily Volume"
)

daily_sales_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Daily Sales Metrics"
)

daily_mint_metrics_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Daily Mints Metrics"
)

listings_growth_rate_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
    title="Listings Growth Rate"
)

highest_selling_domains_fig.update_layout(
    plot_bgcolor='#fafafa',
    paper_bgcolor='#fafafa',
)



Average Time to Sell a .box Domain: 19 days 11:19:25.506097561

Average Listing Price to Sale Price Change -14.673687659116787

Cumulative Listings to Sales Ratio: 25:1

Cumulative Mints to Sales Ratio: 171:1

Monthly Listings Growth Rate: -70.2247191011236

In [183]:
avg_time_to_sell

Timedelta('19 days 11:19:25.506097561')

In [184]:
key_metrics = [
    {"label": "Average Listing Price to Sale Price Change", "value": f"{round(int(listing_price_to_sale_avg_pct_change),0)}", "unit": "%"},
    {"label": "Average Days on Market", "value": str(avg_time_to_sell.days), "unit": " days"},
    {"label": "Cumulative Listings to Sales Ratio", "value": f"{round(int(cumulative_listings_to_sales), 0)}", "unit": ":1"},
    {"label": "Cumulative Mints to Sales Ratio", "value": f"{round(int(cumulative_mint_to_sales),0)}", "unit": ":1"},
    {"label": "Monthly Listings Growth Rate", "value": f"{listings_growth_rate.iloc[-1]:.2f}", "unit": "%"}
]


In [185]:
max_eth_sale_row

name           vm.box
price_usd   31,481.47
price_eth       10.00
Name: 2024-04-27 16:00:00+00:00, dtype: object

In [186]:
max_eth_sale_details = {
    "name": max_eth_sale_row["name"],
    "price_usd": max_eth_sale_row["price_usd"],
    "price_eth": max_eth_sale_row["price_eth"],
    "date": max_eth_sale_row.name  # This is the index (timestamp)
}

highest_sold_domain_str = f"""
Name: {max_eth_sale_details['name']}
Price (USD): ${max_eth_sale_details['price_usd']:.2f}
Price (ETH): {max_eth_sale_details['price_eth']}
Date: {max_eth_sale_details['date'].strftime('%Y-%m-%d')}
"""

In [187]:
highest_sold_domain_str

'\nName: vm.box\nPrice (USD): $31481.47\nPrice (ETH): 10.0\nDate: 2024-04-27\n'

In [188]:
sales_metrics = [
    {"label": "Total Sales Volume ", "value":f"${cumulative_box_vol.iloc[-1]:,.2f}", "unit": ""},
    {"label": "Highest Sold Domain", "value": highest_sold_domain_str, "unit": ""},
    {"label": "Average Sales Price ", "value":f"${avg_box_sale:,.2f}", "unit": ""}
    
]

In [189]:
listings_metrics = [

    {"label":"Total .box Listings on Opensea ", "value": str(total_box_listings), "unit": ""}
]

In [190]:
mints_metrics = [
    {"label":"Total .box Mints ", "value":str(total_box_mints), "unit": ""}
]

In [191]:
def generate_table(dataframe, max_rows=11):
    return html.Table([
        html.Thead(
            html.Tr([html.Th(col) for col in dataframe.columns])
        ),
        html.Tbody([
            html.Tr([
                html.Td(dataframe.iloc[i][col]) for col in dataframe.columns
            ]) for i in range(min(len(dataframe), max_rows))
        ])
    ])

In [192]:
latest_box_listings

Unnamed: 0,start_time,price_in_eth,end_time,name,price_in_usd_start_time
0,2024-07-10 21:00:00,50.0,2024-08-10 21:00:00,university.box,155186.0
1,2024-07-10 21:00:00,50.0,2024-08-10 21:00:00,pharmacy.box,155186.0
2,2024-07-09 22:00:00,33.0,2025-01-09 22:00:00,blum.box,101282.94
3,2024-07-09 21:00:00,1.0,2024-07-16 21:00:00,cypherpunk.box,3061.28
4,2024-07-09 12:00:00,1.69,2024-07-12 12:00:00,ia.box,5210.41
5,2024-07-09 12:00:00,0.96,2024-08-09 12:00:00,girl.box,2959.76
6,2024-07-09 12:00:00,1.0,2024-08-09 12:00:00,demon.box,3083.08
7,2024-07-09 12:00:00,0.69,2024-08-09 12:00:00,ensvision.box,2127.33
8,2024-07-09 12:00:00,1.8,2024-08-09 12:00:00,ia.box,5549.54
9,2024-07-09 06:00:00,18.0,2024-10-30 05:00:00,coca-cola.box,55198.44


external_stylesheets = [
    'https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css', 
    '/assets/styles.css'
]

app = Dash(__name__, external_stylesheets=external_stylesheets)

tts_markdown = f"""
Average Time to Sell a .box Domain: {avg_time_to_sell}
"""

listing_to_sale_mk = f"""Average Listing Price to Sale Price Change {listing_price_to_sale_avg_pct_change} """

cumulative_lts_mk = f"""Cumulative Listings to Sales Ratio: {round(int(cumulative_listings_to_sales), 0)}:1"""
cumulative_mts_mk = f"""Cumulative Mints to Sales Ratio: {round(int(cumulative_mint_to_sales),0)}:1"""
cum_sale_latest_mk = f"""Total Sales Volume: {cumulative_box_vol.iloc[-1]} """
max_sale_mk = f"""Highest Sold Domain: {max_eth_sale_row} """
total_mints_mk = f"""Total .box Mints: {total_box_mints} """
total_listings_mk = f"""Total .box Listings on Opensea {total_box_listings} """
latest_listings_growth_mk = f"""Monthly Listings Growth Rate: {listings_growth_rate.iloc[-1]} """
avg_sold_mk = f"""Average Sales Price: {avg_box_sale} """





# Define the layout of the app
app.layout = html.Div(style={'backgroundColor': 'var(--color-background)'}, children=[
    html.H1(children='.box Domains Dashboard', style={'textAlign': 'center', 'color': 'var(--wcm-color-fg-1)'}),
    html.Br(),
    html.H2('.box Domain Valuator', style={'color': 'var(--wcm-color-fg-2)'}),
    html.Div([
        "Input Domain Name: ",
        dcc.Input(id='valuator-input', value='example.box', type='text'),
        html.Button('Submit', id='submit-button', n_clicks=0)
    ], style={'color': 'var(--wcm-color-fg-1)'}),
    html.Br(),
    html.Div(id='valuator-output', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Br(),
    html.H2('Key Metrics'),
    html.Div(className='metrics-container', style={
        'display': 'grid',
        'gridTemplateColumns': 'repeat(3, 1fr)',  # Adjust based on the number of metrics
        'gap': '20px',
        'padding': '20px',
        'backgroundColor': 'var(--wcm-color-bg-1)',
        'borderRadius': 'var(--wcm-container-border-radius)',
        'boxShadow': '0 4px 8px var(--wcm-color-overlay)'
    }, children=[
        html.Div(className='metric', style={
            'padding': '15px',
            'backgroundColor': 'var(--wcm-color-bg-2)',
            'borderRadius': 'var(--wcm-button-border-radius)',
            'textAlign': 'center',
            'color': 'var(--wcm-color-fg-1)'
        }, children=[
            html.Span(metric["label"], style={'color': 'var(--wcm-color-fg-2)'}),
            html.Span(f"{metric['value']}{metric['unit']}", style={'color': 'var(--wcm-accent-color)'})
        ]) for metric in key_metrics
    ]),
    dcc.Graph(id='mint to sales', figure=mint_to_sales_fig),
    dcc.Graph(id='listings to sales', figure=listing_to_sales_fig),
    
    html.H2('Sales'),
    html.Div(className='metrics-container', style={
        'display': 'grid',
        'gridTemplateColumns': 'repeat(3, 1fr)',  # Adjust based on the number of metrics
        'gap': '20px',
        'padding': '20px',
        'backgroundColor': 'var(--wcm-color-bg-1)',
        'borderRadius': 'var(--wcm-container-border-radius)',
        'boxShadow': '0 4px 8px var(--wcm-color-overlay)'
    }, children=[
        html.Div(className='metric', style={
            'padding': '15px',
            'backgroundColor': 'var(--wcm-color-bg-2)',
            'borderRadius': 'var(--wcm-button-border-radius)',
            'textAlign': 'center',
            'color': 'var(--wcm-color-fg-1)'
        }, children=[
            html.Span(metric["label"], style={'color': 'var(--wcm-color-fg-2)'}),
            html.Span(f"{metric['value']}{metric['unit']}", style={'color': 'var(--wcm-accent-color)'})
        ]) for metric in sales_metrics
    ]),
    dcc.Graph(id='daily_sales_count', figure=daily_sales_fig),
    dcc.Graph(id='daily_sales_vol', figure=daily_vol_fig),
    dcc.Graph(id='highest selling', figure=highest_selling_domains_fig),
    html.Br(),
    html.H3('Monthly Sales Metrics'),
    # generate_table(monthly_box_sales_metrics),
    dash_table.DataTable(
        id='monthly_sales_metrics',
        columns=[{"name": i, "id": i} for i in monthly_box_sales_metrics.columns],
        data=monthly_box_sales_metrics.to_dict('records'),
        style_table={'overflowX': 'auto'},
        style_cell={
            'height': 'auto',
            'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
            'whiteSpace': 'normal',
            'font-family': 'var(--font-primary, "Inter")',
            'backgroundColor': 'var(--color-background)',
            'color': 'var(--wcm-color-fg-2)'
        }
    ),

    html.H3('10 Latest Sales'),
    dash_table.DataTable(
        id='latest_sales',
        columns=[{"name": i, "id": i} for i in latest_box_domains_sales.columns],
        data=latest_box_domains_sales.to_dict('records'),
        style_table={'overflowX': 'auto'},
        style_cell={
            'height': 'auto',
            'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
            'whiteSpace': 'normal',
            'font-family': 'var(--font-primary, "Inter")',
            'backgroundColor': 'var(--color-background)',
            'color': 'var(--wcm-color-fg-2)'
        }
    ),
    html.Br(),
    html.H3('10 Highest Selling Domains'),
    dash_table.DataTable(
        id='highest_sales',
        columns=[{"name": i, "id": i} for i in highest_selling_domains.columns],
        data=highest_selling_domains.to_dict('records'),
        style_table={'overflowX': 'auto'},
        style_cell={
            'height': 'auto',
            'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
            'whiteSpace': 'normal',
            'font-family': 'var(--font-primary, "Inter")',
            'backgroundColor': 'var(--color-background)',
            'color': 'var(--wcm-color-fg-2)'
        }
    ),
    
    html.Br(),
    html.H2('Listings'),
    html.Div(className='metrics-container', style={
        'display': 'grid',
        'gridTemplateColumns': 'repeat(1, 1fr)',  # Adjust based on the number of metrics
        'gap': '20px',
        'padding': '20px',
        'backgroundColor': 'var(--wcm-color-bg-1)',
        'borderRadius': 'var(--wcm-container-border-radius)',
        'boxShadow': '0 4px 8px var(--wcm-color-overlay)'
    }, children=[
        html.Div(className='metric', style={
            'padding': '15px',
            'backgroundColor': 'var(--wcm-color-bg-2)',
            'borderRadius': 'var(--wcm-button-border-radius)',
            'textAlign': 'center',
            'color': 'var(--wcm-color-fg-1)'
        }, children=[
            html.Span(metric["label"], style={'color': 'var(--wcm-color-fg-2)'}),
            html.Span(f"{metric['value']}{metric['unit']}", style={'color': 'var(--wcm-accent-color)'})
        ]) for metric in listings_metrics
    ]),
    dcc.Graph(id='monthly listings growth', figure=listings_growth_rate_fig),
    html.H3('Historical Listings to Sales'),
    dash_table.DataTable(
        id='listings_to_sales',
        columns=[{"name": i, "id": i} for i in historical_listing_to_sales.columns],
        data=historical_listing_to_sales.to_dict('records'),
        style_table={'overflowX': 'auto'},
        style_cell={
            'height': 'auto',
            'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
            'whiteSpace': 'normal',
            'font-family': 'var(--font-primary, "Inter")',
            'backgroundColor': 'var(--color-background)',
            'color': 'var(--wcm-color-fg-2)'
        }
    ),
    html.H3('10 Latest Listings'),
    dash_table.DataTable(
        id='latest_listings',
        columns=[{"name": i, "id": i} for i in latest_box_listings.columns],
        data=latest_box_listings.to_dict('records'),
        style_table={'overflowX': 'auto'},
        style_cell={
            'height': 'auto',
            'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
            'whiteSpace': 'normal',
            'font-family': 'var(--font-primary, "Inter")',
            'backgroundColor': 'var(--color-background)',
            'color': 'var(--wcm-color-fg-2)'
        }
    ),



    html.H2('Mints'),
    html.Div(className='metrics-container', style={
        'display': 'grid',
        'gridTemplateColumns': 'repeat(1, 1fr)',  # Adjust based on the number of metrics
        'gap': '20px',
        'padding': '20px',
        'backgroundColor': 'var(--wcm-color-bg-1)',
        'borderRadius': 'var(--wcm-container-border-radius)',
        'boxShadow': '0 4px 8px var(--wcm-color-overlay)'
    }, children=[
        html.Div(className='metric', style={
            'padding': '15px',
            'backgroundColor': 'var(--wcm-color-bg-2)',
            'borderRadius': 'var(--wcm-button-border-radius)',
            'textAlign': 'center',
            'color': 'var(--wcm-color-fg-1)'
        }, children=[
            html.Span(metric["label"], style={'color': 'var(--wcm-color-fg-2)'}),
            html.Span(f"{metric['value']}{metric['unit']}", style={'color': 'var(--wcm-accent-color)'})
        ]) for metric in mints_metrics
    ]),
    dcc.Graph(id='daily_mints', figure=daily_mint_metrics_fig),
    html.Br(),
    html.H3('10 Latest Mints'),
    dash_table.DataTable(
        id='latest_mints',
        columns=[{"name": i, "id": i} for i in latest_box_domains_mints.columns],
        data=latest_box_domains_mints.to_dict('records'),
        style_table={'overflowX': 'auto'},
        style_cell={
            'height': 'auto',
            'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
            'whiteSpace': 'normal',
            'font-family': 'var(--font-primary, "Inter")',
            'backgroundColor': 'var(--color-background)',
            'color': 'var(--wcm-color-fg-2)'
        }
    ),
    
    
])

# Define the callback
@callback(
    Output(component_id='valuator-output', component_property='children'),
    Input(component_id='submit-button', component_property='n_clicks'),
    State(component_id='valuator-input', component_property='value')
)
def update_output_div(n_clicks, domain):
    if n_clicks == 0:
        return "Please enter a domain name and click Submit."
    
    if not domain:
        return "Please enter a domain name."
    
    domain_df = pd.DataFrame({'domain': [domain]})
    domain_processed = model_prep(domain_df)
    domain_value = value_domain(domain_processed)
    return f'Estimated Value: ${round(domain_value,2):,.2f}'

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

In [193]:
external_stylesheets = [
    'https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css', 
    '/assets/styles.css'
]

app = Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div(style={'backgroundColor': 'var(--color-background)'}, children=[
    html.H1(
        children='.box Domains Dashboard',
        style={
            'textAlign': 'center',
            'color': 'var(--wcm-color-fg-1)',
            'fontSize': '36px',
            'fontWeight': 'bold',
            'marginBottom': '20px'
        }
    ),
    html.Br(),
    html.H2('.box Domain Valuator', style={
        'color': 'var(--wcm-color-fg-2)', 
        'textAlign': 'center', 
        'marginBottom': '20px'
    }),
    html.Div([
        html.Label("Input Domain Name:", style={
            'color': 'var(--wcm-color-fg-1)', 
            'marginRight': '10px',
            'fontWeight': 'bold'
        }),
        dcc.Input(
            id='valuator-input',
            value='example',
            type='text',
            style={
                'padding': '10px',
                'borderRadius': 'var(--wcm-input-border-radius)',
                'border': '1px solid var(--color-border)',
                'marginRight': '10px'
            },
            pattern='[^.]*'  # Regex pattern to disallow '.' character
        ),
        html.Button('Submit', id='submit-button', n_clicks=0, style={
            'padding': '10px 20px', 
            'borderRadius': 'var(--wcm-button-border-radius)', 
            'backgroundColor': 'var(--wcm-accent-color)', 
            'color': 'var(--wcm-accent-fill-color)',
            'border': 'none',
            'cursor': 'pointer'
        })
    ], style={
        'display': 'flex', 
        'alignItems': 'center', 
        'justifyContent': 'center', 
        'marginBottom': '20px'
    }),
    html.Br(),
    html.Div(id='valuator-output', style={
        'color': 'var(--wcm-color-fg-1)', 
        'textAlign': 'center', 
        'marginTop': '20px'
    }),
    html.Br(),
    html.H2('Key Metrics', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(className='metrics-container', children=[
        html.Div(className='metric', children=[
            html.Span(metric["label"], className='label'),
            html.Span(f"{metric['value']}{metric['unit']}", className='value')
        ]) for metric in key_metrics
    ]),
    html.Br(),
    dcc.Graph(id='mint to sales', figure=mint_to_sales_fig),
    dcc.Graph(id='listings to sales', figure=listing_to_sales_fig),
    
    html.H2('Sales', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(className='metrics-container', children=[
        html.Div(className='metric', children=[
            html.Span(metric["label"], className='label'),
            html.Span(f"{metric['value']}{metric['unit']}", className='value')
        ]) for metric in sales_metrics
    ]),
    html.Br(),
    dcc.Graph(id='daily_sales_count', figure=daily_sales_fig),
    dcc.Graph(id='daily_sales_vol', figure=daily_vol_fig),
    dcc.Graph(id='highest selling', figure=highest_selling_domains_fig),
    html.Br(),
    html.H3('Monthly Sales Metrics', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(style={'display': 'flex', 'justify-content': 'center', 'padding': '10px'}, children=[
        html.Div(style={'width': '80%', 'max-width': '1000px'}, children=[
            dash_table.DataTable(
                id='monthly_sales_metrics',
                columns=[{"name": i, "id": i} for i in monthly_box_sales_metrics.columns],
                data=monthly_box_sales_metrics.to_dict('records'),
                style_table={'overflowX': 'auto'},
                style_as_list_view=True,
                style_header={
                    'backgroundColor': 'var(--wcm-color-bg-2)',
                    'fontWeight': 'bold',
                    'color': 'var(--wcm-color-fg-1)'
                },
                style_cell={
                    'height': 'auto',
                    'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
                    'whiteSpace': 'normal',
                    'font-family': 'var(--font-primary, "Inter")',
                    'backgroundColor': 'var(--color-background)',
                    'color': 'var(--wcm-color-fg-2)',
                    'padding': '10px',
                    'border': '1px solid var(--color-border)'
                },
                style_data={
                    'border': '1px solid var(--color-border)',
                    'padding': '10px',
                }
            )
        ])
    ]),
    html.Br(),
    html.H3('10 Latest Sales', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(style={'display': 'flex', 'justify-content': 'center', 'padding': '10px'}, children=[
        html.Div(style={'width': '80%', 'max-width': '1000px'}, children=[
            dash_table.DataTable(
                id='latest_sales',
                columns=[{"name": i, "id": i} for i in latest_box_domains_sales.columns],
                data=latest_box_domains_sales.to_dict('records'),
                style_table={'overflowX': 'auto'},
                style_as_list_view=True,
                style_header={
                    'backgroundColor': 'var(--wcm-color-bg-2)',
                    'fontWeight': 'bold',
                    'color': 'var(--wcm-color-fg-1)'
                },
                style_cell={
                    'height': 'auto',
                    'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
                    'whiteSpace': 'normal',
                    'font-family': 'var(--font-primary, "Inter")',
                    'backgroundColor': 'var(--color-background)',
                    'color': 'var(--wcm-color-fg-2)',
                    'padding': '10px',
                    'border': '1px solid var(--color-border)'
                },
                style_data={
                    'border': '1px solid var(--color-border)',
                    'padding': '10px',
                }
            )
        ])
    ]),
    html.Br(),
    html.H3('10 Highest Selling Domains', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(style={'display': 'flex', 'justify-content': 'center', 'padding': '10px'}, children=[
        html.Div(style={'width': '80%', 'max-width': '1000px'}, children=[
            dash_table.DataTable(
                id='highest_sales',
                columns=[{"name": i, "id": i} for i in highest_selling_domains.columns],
                data=highest_selling_domains.to_dict('records'),
                style_table={'overflowX': 'auto'},
                style_as_list_view=True,
                style_header={
                    'backgroundColor': 'var(--wcm-color-bg-2)',
                    'fontWeight': 'bold',
                    'color': 'var(--wcm-color-fg-1)'
                },
                style_cell={
                    'height': 'auto',
                    'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
                    'whiteSpace': 'normal',
                    'font-family': 'var(--font-primary, "Inter")',
                    'backgroundColor': 'var(--color-background)',
                    'color': 'var(--wcm-color-fg-2)',
                    'padding': '10px',
                    'border': '1px solid var(--color-border)'
                },
                style_data={
                    'border': '1px solid var(--color-border)',
                    'padding': '10px',
                }
            )
        ])
    ]),
    html.Br(),
    html.H2('Listings', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(className='metrics-container', children=[
        html.Div(className='metric', children=[
            html.Span(metric["label"], className='label'),
            html.Span(f"{metric['value']}{metric['unit']}", className='value')
        ]) for metric in listings_metrics
    ]),
    html.Br(),
    dcc.Graph(id='monthly listings growth', figure=listings_growth_rate_fig),
    html.H3('Historical Listings to Sales', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(style={'display': 'flex', 'justify-content': 'center', 'padding': '10px'}, children=[
        html.Div(style={'width': '80%', 'max-width': '1000px'}, children=[
            dash_table.DataTable(
                id='listings_to_sales',
                columns=[{"name": i, "id": i} for i in historical_listing_to_sales.columns],
                data=historical_listing_to_sales.to_dict('records'),
                style_table={'overflowX': 'auto'},
                style_as_list_view=True,
                style_header={
                    'backgroundColor': 'var(--wcm-color-bg-2)',
                    'fontWeight': 'bold',
                    'color': 'var(--wcm-color-fg-1)'
                },
                style_cell={
                    'height': 'auto',
                    'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
                    'whiteSpace': 'normal',
                    'font-family': 'var(--font-primary, "Inter")',
                    'backgroundColor': 'var(--color-background)',
                    'color': 'var(--wcm-color-fg-2)',
                    'padding': '10px',
                    'border': '1px solid var(--color-border)'
                },
                style_data={
                    'border': '1px solid var(--color-border)',
                    'padding': '10px',
                }
            )
        ])
    ]),
    html.H3('10 Latest Listings', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(style={'display': 'flex', 'justify-content': 'center', 'padding': '10px'}, children=[
        html.Div(style={'width': '80%', 'max-width': '1000px'}, children=[
            dash_table.DataTable(
                id='latest_listings',
                columns=[{"name": i, "id": i} for i in latest_box_listings.columns],
                data=latest_box_listings.to_dict('records'),
                style_table={'overflowX': 'auto'},
                style_as_list_view=True,
                style_header={
                    'backgroundColor': 'var(--wcm-color-bg-2)',
                    'fontWeight': 'bold',
                    'color': 'var(--wcm-color-fg-1)'
                },
                style_cell={
                    'height': 'auto',
                    'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
                    'whiteSpace': 'normal',
                    'font-family': 'var(--font-primary, "Inter")',
                    'backgroundColor': 'var(--color-background)',
                    'color': 'var(--wcm-color-fg-2)',
                    'padding': '10px',
                    'border': '1px solid var(--color-border)'
                },
                style_data={
                    'border': '1px solid var(--color-border)',
                    'padding': '10px',
                }
            )
        ])
    ]),

    html.H2('Mints', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(className='metrics-container', children=[
        html.Div(className='metric', children=[
            html.Span(metric["label"], className='label'),
            html.Span(f"{metric['value']}{metric['unit']}", className='value')
        ]) for metric in mints_metrics
    ]),
    html.Br(),
    dcc.Graph(id='daily_mints', figure=daily_mint_metrics_fig),
    html.Br(),
    html.H3('10 Latest Mints', style={'color': 'var(--wcm-color-fg-1)'}),
    html.Div(style={'display': 'flex', 'justify-content': 'center', 'padding': '10px'}, children=[
        html.Div(style={'width': '80%', 'max-width': '1000px'}, children=[
            dash_table.DataTable(
                id='latest_mints',
                columns=[{"name": i, "id": i} for i in latest_box_domains_mints.columns],
                data=latest_box_domains_mints.to_dict('records'),
                style_table={'overflowX': 'auto'},
                style_as_list_view=True,
                style_header={
                    'backgroundColor': 'var(--wcm-color-bg-2)',
                    'fontWeight': 'bold',
                    'color': 'var(--wcm-color-fg-1)'
                },
                style_cell={
                    'height': 'auto',
                    'minWidth': '150px', 'width': '150px', 'maxWidth': '150px',
                    'whiteSpace': 'normal',
                    'font-family': 'var(--font-primary, "Inter")',
                    'backgroundColor': 'var(--color-background)',
                    'color': 'var(--wcm-color-fg-2)',
                    'padding': '10px',
                    'border': '1px solid var(--color-border)'
                },
                style_data={
                    'border': '1px solid var(--color-border)',
                    'padding': '10px',
                }
            )
        ])
    ]),
])

# Define the callback
@callback(
    Output(component_id='valuator-output', component_property='children'),
    Input(component_id='submit-button', component_property='n_clicks'),
    State(component_id='valuator-input', component_property='value')
)
def update_output_div(n_clicks, domain_prefix):
    if n_clicks == 0:
        return "Please enter a domain prefix and click Submit."
    
    if not domain_prefix:
        return "Please enter a domain prefix."
    
    if '.' in domain_prefix:
        return "Invalid input. Please enter a valid domain prefix without a '.' character."
    
    domain = f"{domain_prefix}.box"
    domain_df = pd.DataFrame({'domain': [domain]})
    domain_processed = model_prep(domain_df)
    domain_value = value_domain(domain_processed)
    return html.Div([
        html.Div(f'Domain: {domain}', style={'font-weight': 'bold'}),
        html.Div(f'Estimated Value: ${round(domain_value, 2):,.2f}')
    ])

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


predicted value: 1337.918250592203
predicted value: 1337.918250592203
predicted value: 2560.052986420118
predicted value: 2560.052986420118
predicted value: 2560.052986420118
predicted value: 3005.042974408911
predicted value: 2560.052986420118
predicted value: 2560.052986420118
predicted value: 2838.9655944837464
predicted value: 1948.9856185061603
predicted value: 2726.1303663452823
predicted value: 1337.918250592203
predicted value: 2560.052986420118
predicted value: 2560.052986420118
predicted value: 2726.1303663452823
predicted value: 2281.1403783564892
predicted value: 1948.9856185061603
predicted value: 1782.9082385809966
predicted value: 1337.918250592203
predicted value: 2560.052986420118
predicted value: 1337.918250592203
predicted value: 1337.918250592203
predicted value: 1337.918250592203
