# Generate Data for Coin2Gether

In [1]:
from faker import Faker
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
faker = Faker()

## Settings

In [3]:
AMOUNT_USER = 100
AMOUNT_POSTS = 250
AMOUNT_TRADES = 250
CURRENCIES = ['Bitcoin', 'Ethereum', 'Euro', 'US-Dollar', 'Canadian-Dollar', 'Bitcoin Cash', 'Litecoin', 'Dogecoin', 'Yen', 'Yuán']
PLATFORMS = ['Twitter', 'TikTok', 'LinkedIn', 'Facebook', 'Instagram']

## Utils

In [4]:
def get_value_for_currency(high: float):
    has_value = np.random.choice([True, False], p=[0.2, 0.8]) # each user has 20% of currencies in their portfolio
    if has_value:
        return round(np.random.uniform(0, high), 2)
    else:
        return 0

In [5]:
def generate_title():
    currency = faker.cryptocurrency_name()
    prognosis = np.random.choice(['will go up', 'will go down'], p=[0.7, 0.3])
    if (np.random.choice([True, False])):
        date = f'in {faker.month_name()}'
    else:
        date = f'on {faker.day_of_week()}'
    return f'{currency} {prognosis} {date}'

In [6]:
def get_random_percentage():
    return round(abs(np.random.randn()), 4)

In [7]:
def get_random_text():
    return faker.text().replace('\n', ' ').replace(',', ' ')

## Generate Users and Portfolios

In [8]:
df = pd.DataFrame()
# database starts to count from 1
for i in range(1, AMOUNT_USER+1):
    name = faker.name()
    name = name.split(' ')
    first_name, last_name = name[0], name[-1] # remove middle names
    high = np.random.uniform(0, 100000)
    user_data = {
        'id': i,
        'email': f'{first_name.lower()}.{last_name.lower()}@example.org',
        'firstName': first_name,
        'lastName': last_name,
        'username': faker.user_name(),
        'birthdate': faker.date_between(start_date='-60y', end_date='-18y'),
        'country': faker.country(),
        'password': 'Passwort123!'
    }
    for currency in CURRENCIES:
        user_data[currency] = get_value_for_currency(high=high)
    
    df = pd.concat([df, pd.DataFrame(user_data, index=[i])], ignore_index=True)
df.to_csv('persons.csv', index=False)
df.head()

Unnamed: 0,id,email,firstName,lastName,username,birthdate,country,password,Bitcoin,Ethereum,Euro,US-Dollar,Canadian-Dollar,Bitcoin Cash,Litecoin,Dogecoin,Yen,Yuán
0,1,robert.nguyen@example.org,Robert,Nguyen,vanessacampbell,1989-10-21,Afghanistan,Passwort123!,0.0,0.0,24903.61,0.0,0.0,0.0,18432.33,0.0,0.0,0.0
1,2,robert.schmidt@example.org,Robert,Schmidt,dana83,1969-04-21,Uruguay,Passwort123!,0.0,0.0,0.0,0.0,23506.97,0.0,0.0,0.0,0.0,0.0
2,3,michael.bennett@example.org,Michael,Bennett,joseph63,1990-05-17,Peru,Passwort123!,0.0,0.0,22125.14,52995.93,0.0,76963.0,0.0,0.0,63638.46,0.0
3,4,thomas.ray@example.org,Thomas,Ray,cmartin,1967-06-14,Faroe Islands,Passwort123!,0.0,0.0,0.0,0.0,0.0,0.0,380.19,0.0,0.0,14.19
4,5,william.underwood@example.org,William,Underwood,norman25,1963-09-19,Slovenia,Passwort123!,0.0,0.0,10640.89,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Generate followers

In [9]:
G = nx.gaussian_random_partition_graph(n=AMOUNT_USER, s=50, v=20, p_in=0.4, p_out=0.2, directed=True)
df_follower = nx.to_pandas_edgelist(G)
df_follower.to_csv('follower.csv', index=False)
df_follower.head()

Unnamed: 0,source,target
0,0,3
1,0,4
2,0,7
3,0,9
4,0,13


## Generate Posts

In [10]:
df_posts = pd.DataFrame()
for i in range(AMOUNT_POSTS):
    df = pd.read_csv('persons.csv')
    post = {
        'title': generate_title(),
        'description': get_random_text(),
        'user': np.random.choice(df['id']),
        'date': faker.date_between(start_date='-1y', end_date='today')
    }
    df_posts = pd.concat([df_posts, pd.DataFrame(post, index=[1])], ignore_index=True)
df_posts.to_csv('posts.csv', index=False)
print(len(df_posts))
df_posts.head()

250


Unnamed: 0,title,description,user,date
0,Nxt will go up on Wednesday,If act cup character hundred. North miss stati...,52,2021-06-10
1,Bitcoin Cash will go up on Saturday,Medical business degree decade teach. Threat s...,17,2021-12-18
2,Dogecoin will go up in July,Become style beautiful general allow themselve...,81,2021-09-24
3,XEM will go up on Tuesday,Back work despite must hair finish cause. Make...,77,2022-03-15
4,Dash will go up in September,Need region act personal assume. Develop repor...,30,2021-07-24


## Generate Trades

In [11]:
df_trades = pd.DataFrame()
for i in range(AMOUNT_TRADES):
    df = pd.read_csv('persons.csv')
    publish_date = faker.date_between(start_date='-1y', end_date='today')
    start_date = faker.date_between(start_date=publish_date, end_date='+1y')
    end_date = faker.date_between(start_date=start_date, end_date='+1y')
    selectable_currencies = []
    while (len(selectable_currencies) < 2):
        user = df[df['id'] == np.random.choice(df['id'])].iloc[0]
        selectable_currencies = []
        for currency in CURRENCIES:
            if user[currency] > 0:
                selectable_currencies.append(currency)
    debit_currency = np.random.choice(selectable_currencies)
    selectable_currencies.remove(debit_currency)
    credit_currency = np.random.choice(selectable_currencies)
    debit_amount = user[debit_currency] * get_random_percentage()
    credit_amount = user[credit_currency] * get_random_percentage()
    trade = {
        'publish_date': publish_date,
        'start_date': start_date,
        'end_date': end_date,
        'motivation': generate_title(),
        'description': get_random_text(),
        'expected_change': np.random.randint(250),
        'percentage_trade': np.random.randint(100),
        'author': user.id,
        'debit_currency': debit_currency,
        'debit_amount': debit_amount,
        'credit_currency': credit_currency,
        'credit_amount': credit_amount,
    }
    df.at[user.name, debit_currency] += debit_amount
    df.at[user.name, credit_currency] -= credit_amount
    df_trades = pd.concat([df_trades, pd.DataFrame(trade, index=[1])], ignore_index=True)
df_trades.to_csv('trades.csv', index=False)
print(len(df_trades))
df_trades.head()

250


Unnamed: 0,publish_date,start_date,end_date,motivation,description,expected_change,percentage_trade,author,debit_currency,debit_amount,credit_currency,credit_amount
0,2021-07-02,2022-11-18,2022-12-30,Dash will go down in January,Modern to real claim appear organization value...,41,9,62,Euro,1984.81752,Litecoin,2783.179152
1,2022-03-14,2023-01-02,2023-01-18,Ubiq will go up on Thursday,Sense look mission official surface white send...,85,12,30,Yuán,33088.634568,Bitcoin Cash,13772.642155
2,2021-08-01,2022-01-01,2022-07-30,Ethereum Classic will go down on Thursday,Fall sea rate ever college. War trade year dif...,97,71,74,Euro,8283.616344,Bitcoin Cash,6405.636464
3,2022-03-20,2023-05-17,2023-05-18,Primecoin will go up on Friday,Behavior impact action maintain. Painting real...,100,57,53,Bitcoin,8062.138448,Canadian-Dollar,15264.743865
4,2022-05-18,2023-04-29,2023-05-05,Dogecoin will go up in September,Other remain to responsibility. Share note car...,145,72,16,Canadian-Dollar,1137.795904,Yen,2401.362372
