### Imports & setup

In [1]:
import syft as sy

In [2]:
import sklearn
import sklearn.linear_model

In [3]:
sy.load_lib("sklearn")
sy.load_lib("pandas")
remote = sy.VirtualMachine().get_root_client()
remote_sklearn = remote.sklearn
remote_pd = remote.pandas

### Load example data and inspect

In [4]:
# Change this path to any available CSV on your machine
CSV_DATASET='/cache/datasets/paysim/PS_20174392719_1491204439457_log.csv'

In [5]:
import pandas as pd
df = pd.read_csv(CSV_DATASET)
df = df.head(5)

In [6]:
df

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


### Serialise a dataframe

In [7]:
sy.serialize(obj=df)

id {
  value: "\273\275,\336\377sBl\217\'\255\3450\325*2"
}
obj_type: "syft.lib.pandas.frame.PandasDataFrameWrapper"
data {
  type_url: "type.googleapis.com/syft.lib.pandas.PandasDataFrame"
  value: "\022\347\247\001\n\255\001\n$syft.lib.python.string.StringWrapper\022\204\001\n\022\n\020\211\331\222\353 \361B\023\201\273\241V\266\245x\351\022$syft.lib.python.string.StringWrapper\"H\n*type.googleapis.com/syft.lib.python.String\022\032\n\004step\022\022\n\020\211\331\222\353 \361B\023\201\273\241V\266\245x\351\n\255\001\n$syft.lib.python.string.StringWrapper\022\204\001\n\022\n\020N\216l\326P\000M`\264\242f\255\261c\365\334\022$syft.lib.python.string.StringWrapper\"H\n*type.googleapis.com/syft.lib.python.String\022\032\n\004type\022\022\n\020N\216l\326P\000M`\264\242f\255\261c\365\334\n\257\001\n$syft.lib.python.string.StringWrapper\022\206\001\n\022\n\020\265k4\327\022\203I\037\245\351N\342\241\006\245\360\022$syft.lib.python.string.StringWrapper\"J\n*type.googleapis.com/syft.lib.pytho

### Serialise and deserialise a dataframe

Triggers a `UserWarning` in `storeable_object` when trying to set attributes (Pandas hijacks this for column names).

In [8]:
sy.deserialize(blob=sy.serialize(obj=df))

  data.tags = tags
  result.read_permissions = {}
  result.search_permissions = {}


Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.639648,C1231006815,170136.0,160296.359375,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.280029,C1666544295,21249.0,19384.720703,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.139648,C2048537720,41554.0,29885.859375,M1230701703,0.0,0.0,0,0


### Serialise and deserialise a simple dataframe

In [9]:
a = [1, 2, 3]
b = pd.DataFrame(a)
sy.serialize(obj=b)

id {
  value: "\221\226x.B}N\263\2729\360Q\366r\001~"
}
obj_type: "syft.lib.pandas.frame.PandasDataFrameWrapper"
data {
  type_url: "type.googleapis.com/syft.lib.pandas.PandasDataFrame"
  value: "\022\354\t\n\227\001\n\036syft.lib.python.int.IntWrapper\022u\n\022\n\020\263\273\013\277(dDh\260*\366\215\001\303\221\206\022\036syft.lib.python.int.IntWrapper\"?\n\'type.googleapis.com/syft.lib.python.Int\022\024\022\022\n\020\263\273\013\277(dDh\260*\366\215\001\303\221\206\022\273\010\n\033syft.lib.python.DictWrapper\022\233\010\n\022\n\020\20368\350\013pCq\26566\342\273\036\001C\022\033syft.lib.python.DictWrapper\"\347\007\n(type.googleapis.com/syft.lib.python.Dict\022\272\007\n\227\001\n\036syft.lib.python.int.IntWrapper\022u\n\022\n\020[\036\227\313,\232H-\261\220\374\215\350\3423\252\022\036syft.lib.python.int.IntWrapper\"?\n\'type.googleapis.com/syft.lib.python.Int\022\024\022\022\n\020[\036\227\313,\232H-\261\220\374\215\350\3423\252\n\231\001\n\036syft.lib.python.int.IntWrapper\022w

In [10]:
sy.deserialize(blob=sy.serialize(obj=b))

  data.tags = tags
  result.read_permissions = {}
  result.search_permissions = {}


Unnamed: 0,0
0,1
1,2
2,3


### Grab data for a classifier

_Convert the target var to a dataframe so we don't have to serialise `Series` yet_

In [11]:
X = df[['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']]
Y = pd.DataFrame(df['isFraud'])

### Local classifier

_Train and predict on X_

In [12]:
classifier = sklearn.linear_model.LogisticRegression()

In [13]:
classifier.fit(X, Y)

  return f(*args, **kwargs)


LogisticRegression()

In [14]:
classifier.predict(X)

array([0, 0, 1, 1, 0])

### Remote classifier

_Train and predict on X_

In [15]:
remote_classifier = remote_sklearn.linear_model.LogisticRegression()

In [16]:
remote_classifier.fit(X, Y)

  data.tags = tags
  result.read_permissions = {}
  result.search_permissions = {}
  return f(*args, **kwargs)


<syft.proxy.sklearn.linear_model.LogisticRegressionPointer at 0x7fb325b8edf0>

In [17]:
a = remote_classifier.predict(X)

  data.tags = tags
  result.read_permissions = {}
  result.search_permissions = {}


#### `predict()` returns `numpy.ndarray` which needs wrapping:

In [18]:
a.get()

AttributeError: 'numpy.ndarray' object has no attribute 'serialize'