In [1]:
from scipy.spatial.distance import euclidean
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
import json

## Data Loading

In [None]:
file_url = "https://raw.githubusercontent.com/TawerV10/dropshub/main/data/alphadrops.json?token=GHSAT0AAAAAACL3E3A5D7A3OZ2Q3LT3KYVAZOBFFYA"
response = requests.get(file_url)
if response.status_code == 200:
    data = json.loads(response.text)
    print(f'Count: {len(data)}')
else:
  print(response.status_code)

Count: 138


In [None]:
df = pd.DataFrame(data)
df.shape

(138, 10)

In [None]:
df.head()

Unnamed: 0,title,tags,invest,network,status,description,strategy,website,discord,logo
0,Renzo,Restaking,3.2M,Ethereum,Mainnet,Renzo is a Liquid Restaking Token (LRT) and St...,✅ Stake ETH to obtain ezETH and earn Eigenlaye...,https://app.renzoprotocol.com/?ref=0x4bb12cc38...,,https://api.typedream.com/v0/document/public/1...
1,Butter,Dex,,Mantle,Mainnet,Butter Swap is the premier decentralized liqui...,✅ Make swaps to earn points & fishing attempts...,https://butter.xyz/s/0T1WCK,https://discord.com/invite/butterxyz,https://api.typedream.com/v0/document/public/1...
2,Parcl,Derivatives,11.6M,Solana,Mainnet,Parcl v3 is a perpetuals exchange designed for...,✅ Trade (1 point per $)\n✅ Provide liquidity (...,https://app.parcl.co/referrals,https://twitter.com/Parcl,https://api.typedream.com/v0/document/public/1...
3,Derivio,"Derivatives, Dex",,zkSync,Mainnet,Derivio is an ecosystem of derivative protocol...,✅ Trade perps\n✅ Trade binary options (predict...,https://derivio.xyz/,https://discord.gg/RYfV4ahPeQ,https://api.typedream.com/v0/document/public/1...
4,Ambient,"Dex, Defi",6.5M,"Ethereum, Scroll",Mainnet,Ambient (formerly CrocSwap) is a decentralized...,✅ Trade\n✅ Provide Liquidity,https://ambient.finance,https://discord.com/invite/ambient-finance,https://api.typedream.com/v0/document/public/1...


## Preprocessing

In [None]:
new_df = df[['invest']]
new_df['invest'] = new_df['invest'].apply(lambda x: float(x.replace('M', '')) if x is not None else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['invest'] = new_df['invest'].apply(lambda x: float(x.replace('M', '')) if x is not None else 0)


In [None]:
def not_one_hot_encoding(column):
  new_df[column] = df[column].apply(lambda row: row.split(', ') if row is not None else [])

  unique_values = set(value for values_list in new_df[column] for value in values_list)

  for value in unique_values:
    new_df[value] = new_df[column].apply(lambda x: 1 if value in x else 0)

  new_df.drop(column, axis=1, inplace=True)

In [None]:
columns = ['tags', 'network', 'status']
for column in columns:
  not_one_hot_encoding(column)

new_df.shape

In [None]:
new_df.head()

Unnamed: 0,invest,Money flow,Operating,Tools,Did,Quest,Derivatives,Infrastructure,Launchpad,Dashboard,...,Terra,Frame,Cronos,KAVA,Starknet,Ancient8,Mainnet,Confirmed,Testnet,TBA
0,3.2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,0.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,11.6,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0.0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,6.5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


## Model Building

In [None]:
reference_point = np.array(new_df.iloc[23])

In [None]:
new_df['distance'] = new_df.apply(lambda row: euclidean(row.values, reference_point), axis=1)
max_distance = new_df['distance'].max()

In [None]:
new_df['score'] = 1 + ((max_distance - new_df['distance']) / max_distance) * 99
new_df['score'] = new_df['score'].apply(lambda x: int(x))

In [None]:
new_df['score'][new_df['score'] > 50]

23    100
38     60
Name: score, dtype: int64

## Testing

In [None]:
new_df = df[['invest']]
new_df['invest'] = new_df['invest'].apply(lambda x: float(x.replace('M', '')) if x is not None else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['invest'] = new_df['invest'].apply(lambda x: float(x.replace('M', '')) if x is not None else 0)


In [None]:
def not_one_hot_encoding(column):
  new_df[column] = df[column].apply(lambda row: row.split(', ') if row is not None else [])

In [None]:
columns = ['tags', 'network', 'status']
for column in columns:
  not_one_hot_encoding(column)

new_df.shape

In [None]:
new_df.head()

Unnamed: 0,invest,tags,network,status
0,3.2,[Restaking],[Ethereum],[Mainnet]
1,0.0,[Dex],[Mantle],[Mainnet]
2,11.6,[Derivatives],[Solana],[Mainnet]
3,0.0,"[Derivatives, Dex]",[zkSync],[Mainnet]
4,6.5,"[Dex, Defi]","[Ethereum, Scroll]",[Mainnet]


In [None]:
tags = list(set(value for values_list in new_df['tags'] for value in values_list))
network = list(set(value for values_list in new_df['network'] for value in values_list))
status = list(set(value for values_list in new_df['status'] for value in values_list))

In [None]:
len(tags), len(network), len(status)

(26, 76, 4)

In [None]:
tags

['Money flow',
 'Operating',
 'Tools',
 'Did',
 'Quest',
 'Derivatives',
 'Infrastructure',
 'Launchpad',
 'Dashboard',
 'Network',
 'Bridge',
 'Privacy',
 'Oracle',
 'Restaking',
 'Dex',
 'Lending',
 'Gamefi',
 'Money market',
 'Defi',
 'Social',
 'Omnichain',
 'Wallet',
 'Nft',
 'Dao',
 'Stablecoin',
 'Liquid staking']

In [None]:
network

['OP Stack',
 'BNBChain',
 'Multichain',
 'ETH L2',
 'Cosmos',
 'Opside',
 'Evmos',
 'Polkadot',
 'Nervos',
 'Sui',
 'Linea',
 'Immutable X',
 'Celestia',
 'Ethereum',
 'Arbitrum',
 'Osmosis',
 'Injective',
 'Arbitrum Nova',
 'Fuel',
 'zkSync',
 'Loopring',
 'Base',
 'Fuse',
 'Kujira',
 'Aztec',
 'Mint',
 'Scroll',
 'Manta',
 'Neutron',
 'Moonbeam',
 'Near',
 'Omni Network',
 'Polygon',
 'Harmony',
 'Taiko',
 'Boba',
 'opBNB',
 'Zetachain',
 'OKC',
 'Mode',
 'Secret',
 'zkSync Lite',
 'zkSpace',
 'Specular',
 'Solana',
 'Zora',
 'Ten',
 'Kusama',
 'Fantom',
 'Aptos',
 'Shardeum',
 'Metis',
 'IoTeX',
 'Optimism',
 'CELO',
 'Juno',
 'Gnosis',
 'Axelar',
 'Canto',
 'Mooniver',
 'Everscale',
 'zkLink',
 'Omnichain',
 'Avalanche',
 'Heco',
 'Aurora',
 'Tron',
 'Side',
 'Polygon zkEVM',
 'Mantle',
 'Terra',
 'Frame',
 'Cronos',
 'KAVA',
 'Starknet',
 'Ancient8']

In [None]:
status

['Mainnet', 'Confirmed', 'Testnet', 'TBA']

## New df

In [13]:
projects = ['zksync', 'starknet', 'layerzero', 'zora', 'polyhedra']
invest = [458, 282, 263, 60, 25]
funds = ['dragonfly a16z blockchain', 'paradigm sequoia pantera', 'a16z sequoia circle', 'haun coinbase kindred', 'polychain binance hashkey']
xscore = [2213, 1065, 1962, 2291, 687]

In [14]:
funds_tier_1 = ['a16z', 'blockchain', 'paradigm', 'sequoia', 'pantera', 'polychain', 'binance']
funds_tier_2 = ['dragonfly', 'coinbase', 'circle', 'hashkey']

In [35]:
new_df = pd.DataFrame({'Project': projects, 'Invest': invest, 'Funds': funds, 'XScore': xscore})
new_df.head()

Unnamed: 0,Project,Invest,Funds,XScore
0,zksync,458,dragonfly a16z blockchain,2213
1,starknet,282,paradigm sequoia pantera,1065
2,layerzero,263,a16z sequoia circle,1962
3,zora,60,haun coinbase kindred,2291
4,polyhedra,25,polychain binance hashkey,687


In [36]:
def calc_funds_score(text):
  funds = text.split(' ')

  score = 0
  for fund in funds:
    if fund in funds_tier_1:
      score += 15
    elif fund in funds_tier_2:
      score += 10
    else:
      score += 5

  return score

def normalization(column):
  min_val = 0.5
  max_val = 1.0
  return min_val + (max_val - min_val) * ((new_df[column] - new_df[column].min()) / (new_df[column].max() - new_df[column].min()))

In [37]:
new_df['Funds'].apply(calc_funds_score)

0    40
1    45
2    40
3    20
4    40
Name: Funds, dtype: int64

In [38]:
new_df['FundsScore'] = new_df['Funds'].apply(calc_funds_score)
new_df['Invest'] = normalization('Invest')
new_df['XScore'] = normalization('XScore')
new_df['FundsScore'] = normalization('FundsScore')
new_df.drop(['Project', 'Funds'], axis=1, inplace=True)
new_df.head()

Unnamed: 0,Invest,XScore,FundsScore
0,1.0,0.975686,0.9
1,0.796767,0.61783,1.0
2,0.774827,0.897444,0.9
3,0.540416,1.0,0.5
4,0.5,0.5,0.9


In [39]:
reference_point = np.array(new_df.iloc[0])
reference_point

array([1.        , 0.97568579, 0.9       ])

In [40]:
new_df['distance'] = new_df.apply(lambda row: euclidean(row.values, reference_point), axis=1)
max_distance = new_df['distance'].max()

In [58]:
new_df['score'] = 500 + ((max_distance - new_df['distance']) / max_distance) * 500
new_df['score'] = new_df['score'].apply(lambda x: int(x))

In [59]:
result = pd.DataFrame({'Projects': projects, 'Score': new_df['score'].tolist()})
result.head()

Unnamed: 0,Projects,Score
0,zksync,1000
1,starknet,693
2,layerzero,827
3,zora,558
4,polyhedra,500
