# Chapter 10 - Hacking Optimus

## Using a local installation of Optimus

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../../optimus")

Use Vaex

In [3]:
from optimus import Optimus
op = Optimus("vaex")

In [None]:
op.client

In [5]:
# df = op.load.my_csv("foo.csv") # <-- you can create this function

df = op.load.csv("foo.csv") # <-- already implemented on Optimus core

In [6]:
df.cols.names()

['name', 'function', ' phone_number', 'num']

## Basic class functions

In [7]:
df.cols.upper("name") # <-- already implemented on Optimus core, compatible with Vaex

name  1 (string),function  2 (string),phone_number  3 (string),num  4 (float64)
OPTIMUS,⋅leader,123-456-7890,2.2
BUMBLEBEE,⋅espionage,123-456-7890,3.0
EJECT,⋅ELECTRONIC⋅SURVEILLANCE,optimus@cybertron.com,4.0


In [8]:
df.cols.select("name")

name  1 (string)
Optimus
Bumblebee
eject


In [9]:
df.cols.slice("name", start=1, stop=3) # <-- already implemented on Optimus core

name  1 (string),function  2 (string),phone_number  3 (string),num  4 (float64)
pt,⋅leader,123-456-7890,2.2
um,⋅espionage,123-456-7890,3.0
je,⋅ELECTRONIC⋅SURVEILLANCE,optimus@cybertron.com,4.0


## Numeric transformations

In [10]:
df.cols.min("num")

array(2.2)

In [11]:
df.cols.sin("num")["num"]

num  1 (float64)
0.8084964038195901
0.1411200080598672
-0.7568024953079282


In [12]:
# example

import numpy as np
import vaex as vx

def sin_plus(expression, value): # <-- Can be implemented as a method in 'VaexFunctions' class
    return np.sin(expression) + value

df.cols.apply("num", func=sin_plus, args=(1,))


name  1 (string),function  2 (string),phone_number  3 (string),num  4 (float64)
Optimus,⋅leader,123-456-7890,1.80849640381959
Bumblebee,⋅espionage,123-456-7890,1.1411200080598671
eject,⋅ELECTRONIC⋅SURVEILLANCE,optimus@cybertron.com,0.2431975046920718


## Applying functions

In [13]:
def func(value):
    return value + 10

df.cols.apply("num", func, mode="map")["num"]  # <-- Applied to every element

num  1 (float64)
12.2
13.0
14.0


In [14]:
def func(expression_or_series):
    return expression_or_series + 10

df.cols.apply("num", func, mode="vectorized")["num"] # <-- Applied to the whole Vaex expression

num  1 (float64)
12.2
13.0
14.0
