### Onboarding Waitlist 

In [10]:
import pandas as pd
import numpy as np 

In [14]:
onboarding_data = pd.read_csv('onboarding_waitlist.csv', low_memory=False)
registration_data = pd.read_excel('network_register.xlsx', sheet_name='Sheet1')

print(registration_data.columns), print(onboarding_data.columns)

Index(['Time', 'Participant', 'What is your HOPR safe address?',
       'What is your Node address',
       'Do you already have the Network Registry NFT?',
       'How would you like to be informed once you're able to join the network?',
       'What is your Telegram handle?', 'What is your e-mail?'],
      dtype='object')
Index(['deployment_date', 'safe_address', 'deployment_tx_hash',
       'wxHOPR_balance', 'nr_nft', 'nft_id'],
      dtype='object')


(None, None)

### Prepare Data and Merge 

In [17]:
# Strip of leading and tailing white space 
onboarding_data["safe_address"] = onboarding_data["safe_address"].str.strip()
registration_data["What is your HOPR safe address?"] = registration_data["What is your HOPR safe address?"].str.strip()

# Make everything lower case letters
onboarding_data["safe_address"] = [x.lower() for x in onboarding_data["safe_address"]]
registration_data["What is your HOPR safe address?"] = [x.lower() for x in registration_data["What is your HOPR safe address?"]] 

# Check for duplicates
dataDup_onboarding = onboarding_data.duplicated(subset=['safe_address'], keep='last')  
dataDup_registration = registration_data.duplicated(subset=['What is your HOPR safe address?'], keep='last')  

# dataDup.value_counts()
onboarding_data['Duplicate'] = dataDup_onboarding
registration_data['Duplicate'] = dataDup_registration 

# Only keep unique values 
onboarding_data_01 = onboarding_data.loc[onboarding_data['Duplicate'] == False]
registration_data_01 = registration_data.loc[registration_data['Duplicate'] == False]

print(onboarding_data_01['Duplicate'].value_counts())
print(registration_data_01['Duplicate'].value_counts())

Duplicate
False    184
Name: count, dtype: int64
Duplicate
False    266
Name: count, dtype: int64


In [26]:
waitlist = registration_data_01.merge(onboarding_data_01, how='left', left_on='What is your HOPR safe address?'
                                                    , right_on='safe_address')

waitlist = waitlist[['Time', 'deployment_date', 'What is your Node address', 'safe_address', 'deployment_tx_hash', 'wxHOPR_balance', 'nr_nft']]

# rename comumns 
waitlist = waitlist.rename(columns={"Time": "registration_time", "What is your Node address": "node_address"})
print(len(waitlist))

# exlude non elidgible nodes 
waitlist_01 = waitlist[~waitlist['safe_address'].isnull()] 
print(len(waitlist_01))

print(waitlist_01.head())

266
76
           registration_time              deployment_date  \
15  2023-09-09T12:56:14.783Z  2023-09-09 12:40:10.000 UTC   
24  2023-09-09T13:27:09.282Z  2023-09-09 12:23:55.000 UTC   
39  2023-09-09T14:13:15.202Z  2023-09-04 14:55:25.000 UTC   
44  2023-09-09T14:31:55.044Z  2023-09-09 14:22:30.000 UTC   
50  2023-09-09T14:54:40.369Z  2023-09-09 14:42:15.000 UTC   

                                  node_address  \
15  0x8de95cdc3291152381f24226422ed9c08beec146   
24  0x7ca010ee624b8186e98f41ec2fa6fa329700c104   
39  0x26323877aab52f90c5da19b53320c99627ab2bfc   
44  0x775175769897c0a46781b423c599f1b6a2b4cde8   
50  0x68cff38fa5d50b13734b1e8d7f26df7ef3bb3f69   

                                  safe_address  \
15  0x4d219619fa660c02236c89e5de46843bdeb41233   
24  0xc9f55edb61a23567302e8553f1e5e37574c71e30   
39  0x8629dbfd74e0b6016215c064093e3c8259a3b260   
44  0x244454ab29bafb5bbb95f8f6de28fcc45e81b620   
50  0x4ab0895c38380dc61b4553963a84033837cd7b6a   

                        

### Delete non-elidgible nodes 