In [47]:
# ensure that any code changes are immediately reflected
%reload_ext autoreload
%autoreload 2

## Initialize Three Users

In [37]:
from mostlyai import MostlyAI
# Test User 1
mostly_t1 = MostlyAI(api_key='mostly-020bd715ef9abca16e07bc800fc31126276f1508baab20036ee08315ceef8527')
# Test User 2
mostly_t2 = MostlyAI(api_key='mostly-fc5d7602dba784ce687d775b72c12d2e93407fba9731144145b85e0548ac1e97')
# Test User 3
mostly_t3 = MostlyAI(api_key='mostly-0dd57f8b8cdb7485310aadead0439c48beea8956273614102e8bf7950bc3f02e')

## Basic Usage

### User 1 trains a Generator

In [38]:
g = mostly_t1.train('https://raw.githubusercontent.com/mostly-ai/public-demo-data/dev/titanic/titanic.csv')

Output()

In [48]:
g

In [52]:
# Open the created generator in a web browser 
g.open()

### User 1 shares the Generator with User 2 and User 3

In [26]:
mostly_t1.share(g, 'test2@mostly.ai')
mostly_t1.share(g, 'test3@mostly.ai')

### User 2 generates a Synthetic Dataset

In [27]:
sd = mostly_t2.generate(g.id, size=1000)

Output()

### User 2 consumes the Synthetic Dataset

In [28]:
sd.data()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked
0,0,3,male,12,0,0,8,Q
1,1,3,male,,0,0,8,S
2,0,3,male,42,0,0,7,S
3,1,2,female,54,0,0,12,S
4,1,3,male,56,1,2,26,S
...,...,...,...,...,...,...,...,...
995,0,3,male,24,0,0,73,S
996,0,1,male,28,2,0,44,S
997,0,3,male,46,0,2,7,C
998,0,3,male,66,0,0,9,S


In [29]:
sd

### User 3 generates a Seeded Synthetic Dataset

In [30]:
# prepare a seed DataFrame
import pandas as pd
import numpy as np
seed_df = pd.DataFrame({'survived': [1] * 10_000, 'age': np.random.randint(5, 18, 10_000)})
seed_df

Unnamed: 0,survived,age
0,1,13
1,1,14
2,1,6
3,1,17
4,1,14
...,...,...
9995,1,10
9996,1,10
9997,1,6
9998,1,14


In [31]:
sd3 = mostly_t3.generate(g.id, seed=seed_df)

Output()

In [32]:
sd3.data()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked
0,1,1,male,13,1,0,5,S
1,1,1,female,14,0,1,26,S
2,1,3,male,6,1,1,8,S
3,1,1,female,17,1,0,76,C
4,1,1,female,14,0,0,79,C
...,...,...,...,...,...,...,...,...
9995,1,3,female,10,1,0,16,S
9996,1,3,female,10,0,0,38,S
9997,1,2,female,6,1,1,11,C
9998,1,3,female,14,0,0,13,S


## Multi-Table

### User 1 trains a 3-table setup from pandas

In [None]:
import pandas as pd
df_players = pd.read_csv('https://github.com/mostly-ai/public-demo-data/raw/dev/baseball/players.csv.gz')
df_fielding = pd.read_csv('https://github.com/mostly-ai/public-demo-data/raw/dev/baseball/fielding.csv.gz')
df_batting = pd.read_csv('https://github.com/mostly-ai/public-demo-data/raw/dev/baseball/batting.csv.gz')
df_players

In [None]:
g = mostly_t1.train(config=
    {
        "name": "baseball",
        "tables": [
            {
                "name": "players", 
                "data": df_players, 
                "primaryKey": "id",
                "modelConfiguration": {"maxTrainingTime": 1}
            },
            {
                "name": "fielding",
                "data": df_fielding, 
                "foreignKeys": [{"column": "players_id", "referencedTable": "players", "isContext": True}],
                "modelConfiguration": {"maxTrainingTime": 1}
            },
            {
                "name": "batting",
                "data": df_batting, 
                "foreignKeys": [{"column": "players_id", "referencedTable": "players", "isContext": True}],
                "modelConfiguration": {"maxTrainingTime": 1}
            }
        ]
    }
)

## Database Connectivity

### User 1 creates a Connector to a Postgres Database

In [None]:
import os
c = mostly_t1.connect({
    "accessType": "SOURCE",
    "type": "POSTGRES",
    "config": {
        "host": "postgres-test.cppq7davohkj.eu-central-1.rds.amazonaws.com",
        "username": "postgres",
        "database": "berka_original",
    },
    "secrets": {
        "password": pwd
    }
})

### User 1 shares Connector with User 2

In [None]:
mostly_t1.share(c.id, 'test2@mostly.ai')

### User 2 uses Connector to train a Generator

In [None]:
c2 = mostly_t2.connectors.get(c.id)
c2

In [None]:
c2.locations()

In [None]:
c2.locations("berka")

In [None]:
g = mostly_t2.train(config=
    {
        "name": "BERKA",
        "tables": [
            {
                "name": "accounts", 
                "sourceConnectorId": str(c2.id),
                "location": "berka.account",
                "primaryKey": "account_id"
            }
        ]
    }
)

In [None]:
sd = mostly_t2.generate(g)

In [None]:
sd.data()