## Gitcoin Grants #14 Trust & DeSoc Scores (🤖,💚)

# Part 5: Labeled Datasets for Community

This notebook produces labeled datasets for sharing with community members.

### Dependencies

In [1]:
import pandas as pd
from settings import PATHS
from utils.classifiers import CLASSIFIERS

# 1/ Ingest GR 14 data

Load the "clean" dataset with a number of indicators derived from onchain metrics for additional processing.

In [2]:
df = pd.read_pickle(PATHS['outdata'])
classified_wallets = pd.concat([
    df.apply(c.func, axis=1).rename(c.name)
    for c in CLASSIFIERS
], axis=1)
df = df.join(classified_wallets)
df.head(1)

Unnamed: 0_level_0,isSquelched,trustscore,qfScore,numDonations,numGrants,sumUSD,numTokens,setTokens,setGrants,binTrustscore,...,poap_art_poaps,dao_voter,snapshot_user,ens_voter,gitcoin_voter,optimism_voter,arbitrum_voter,lens_active,lens_followers,proof_of_humanity
address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0x00000000000cd56832ce5dfbcbff02e7ec639bc9,0.0,1.15,84.602763,27,26,278.75,1,{'DAI'},"{258, 3591, 5511, 12, 4493, 5007, 1297, 5013, ...","(1.0, 1.25]",...,False,True,True,False,False,False,False,True,False,False


# 2/ Prepare outfiles to share with community

In [3]:
grant_features = ['numDonations', 'sumUSD', 'setGrants']
sybil_indicators = ['isSquelched', 'burner', 'trustscore']

In [4]:
outpath = "../private_data/community/gr14_user_transactions.csv"
df[grant_features].to_csv(outpath)

# 3/ Compare results with community models

In [5]:
infile = '../private_data/community/220905_community_screening.csv'

with open(infile) as f:
    lines = [line.replace("\n","").lower() for line in f]

community_df = (pd
                .DataFrame(lines[1:], columns=['address'])
                .set_index('address')
                .join(df[grant_features + sybil_indicators]))
community_df['low_trustscore'] = community_df['trustscore'] == 0.5
community_df

Unnamed: 0_level_0,numDonations,sumUSD,setGrants,isSquelched,burner,trustscore,low_trustscore
address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0x1502a19729df969ced987961830a356f59a7f1c0,16.0,15.750000,"{5857, 6115, 37, 3591, 5288, 5688, 5995, 12, 5...",1.0,False,0.5,True
0xed57d041d6ce7601af2c635976ce3f30a65eadff,16.0,15.750000,"{5857, 6115, 37, 3591, 5288, 5688, 5995, 12, 5...",1.0,True,0.5,True
0x930df074acf694238bdf6e1c947e1c5442d5f019,15.0,14.916477,"{6115, 37, 3591, 5288, 5688, 5995, 12, 5007, 4...",0.0,False,0.5,True
0x1e6c3c2054ec6027b653715be963fed04b836501,15.0,14.835828,"{6115, 37, 3591, 5288, 5688, 5995, 12, 5007, 4...",0.0,False,0.5,True
0xb4d0157329ffec6d327cd145688f280a4aa1c974,15.0,14.835828,"{6115, 37, 3591, 5288, 5688, 5995, 12, 5007, 4...",0.0,False,0.5,True
...,...,...,...,...,...,...,...
0xfefe763f27a3ebb9d9ceafe94a6d9ad2358a2b99,1.0,1.001132,{3591},1.0,True,0.5,True
0xff6f05d27afcc044b08a0c70d596f749d91d6652,1.0,1.014594,{3591},1.0,True,0.5,True
0xffc26c83b50f62ebd4e5b0c44915d4bf36cd0584,5.0,63.000000,"{37, 3591, 12, 2198, 5688}",0.0,False,0.5,True
0xffd3f0393f7bb8daa930668a51b885d48476abec,1.0,1.030818,{3591},1.0,False,0.5,True


In [6]:
community_df.iloc[:,-3:].mean()

burner            0.320552
trustscore        0.614010
low_trustscore    0.810107
dtype: float64

In [7]:
outfile = '../private_data/community/220905_community_screening_results.csv'
community_df.to_csv(outfile)