# Arkouda
## Comparison to Numpy/Pandas

The majority of the API for arkouda functions very similarly to that of Numpy or Pandas. However, under the hood, Arkouda is sending messages out to the Chapel Server for processing. This demo will highlight the key similarities between Arkouda and NumPy/Pandas. Additionally, we will explore some of the chapel functionality making this possible.

## Importing
This may seem like a trivial step to highlight, but Arkouda requires the user establish a connection to the server.

In [None]:
import arkouda as ak
import pandas as pd
import numpy as np

ak.connect()

## Creating Arrays
### Python List

In [None]:
# Creation from python list
a = [0, 1, 2, 3, 4]
ak_array = ak.array(a)
display(ak_array)

np_array = np.array(a)
display(np_array)

### Arange

In [None]:
# arange functionality
ak_array = ak.arange(10)
display(ak_array)

np_array = np.arange(10)
display(np_array)

### NumPy to Arkouda

In [None]:
np_arr = np.arange(10)
display(np_arr)

from_np = ak.array(np_arr)
display(from_np)

### Arkouda to NumPy

In [None]:
ak_arr = ak.arange(10)
display(ak_arr)

np_arr = ak_arr.to_ndarray()
display(np_arr)

## Array Functionality
### Set Operations

In [None]:
np_arr = np.array([4, 2, 5, 6, 4, 7, 2])
np_arr2 = np.array([1, 5, 4, 11, 9, 6])
np_in1d = np.in1d(np_arr, np_arr2)
np_int = np.intersect1d(np_arr, np_arr2)

ak_arr = ak.array(np_arr)
ak_arr2 = ak.array(np_arr2)
ak_in1d = ak.in1d(ak_arr, ak_arr2)
ak_int = ak.intersect1d(ak_arr, ak_arr2)

# Arkouda can perform this operation on multiple arrays at once
m1 =[
    ak.array([0, 1, 3, 4, 8, 5, 0]),
    ak.array([0, 9, 5, 1, 8, 5, 0])
]
m2 =[
    ak.array([0, 1, 3, 4, 8, 7]),
    ak.array([0, 2, 5, 9, 8, 5])
]
ak_in1dmult = ak.in1d(m1, m2)
ak_intmult = ak.intersect1d(m1, m2)


In [None]:
# this block is for display purposes only
from ipywidgets import *

np_out = Output()
with np_out:
    display(HTML("<h3 style='margin:0'>in1d</h3>"))
    display(np_in1d)
    display(HTML("<h3 style='margin:0'>intersect1d</h3>"))
    display(np_int)

ak_out = Output()
with ak_out:
    display(HTML("<h3 style='margin:0'>in1d</h3>"))
    display(ak_in1d)
    display(HTML("<h3 style='margin:0'>intersect1d</h3>"))
    display(ak_int)
    display(HTML("<h3 style='margin:0'>in1d (multi)</h3>"))
    display(ak_in1dmult)
    display(HTML("<h3 style='margin:0'>intersect1d (multi)</h3>"))
    display(ak_intmult)

container = HBox([
    VBox([
        HTML("<h2>NumPy</h2>"),
        np_out
    ], layout=Layout(width='50%')),
    VBox([
        HTML("<h2>Arkouda</h2>"),
        ak_out
    ], layout=Layout(width='50%'))
])

display(container)

### GroupBy

In [None]:
np_grp_keys, np_grp_cts = np.unique(np_arr, return_counts=True)

g = ak.GroupBy(ak_arr)
ak_grp_keys, ak_grp_cts = g.count()

g2 = ak.GroupBy(m1)
ak_2_keys, ak_2_cts = g2.count()

In [None]:
np_outg = Output()
with np_outg:
    display(HTML("<h3 style='margin:0'>Keys</h3>"))
    display(np_grp_keys)
    display(HTML("<h3 style='margin:0'>Counts</h3>"))
    display(np_grp_cts)

ak_outg = Output()
with ak_outg:
    display(HTML("<h3 style='margin:0'>Keys</h3>"))
    display(ak_grp_keys)
    display(HTML("<h3 style='margin:0'>Counts</h3>"))
    display(ak_grp_cts)
    display(HTML("<b>Arkouda is able to process multiple arrays at once, similar to Pandas grouping DataFrames.</b>"))
    display(HTML("<h3 style='margin:0'>Keys (Multi)</h3>"))
    display(ak_2_keys)
    display(HTML("<h3 style='margin:0'>Counts (Multi)</h3>"))
    display(ak_2_cts)

grp_container = HBox([
    VBox([
        HTML("<h2>NumPy</h2>"),
        np_outg
    ], layout=Layout(width='50%')),
    VBox([
        HTML("<h2>Arkouda</h2>"),
        ak_outg
    ], layout=Layout(width='50%'))
])

display(grp_container)

## Creating DataFrames

In [None]:
fname = ['John', 'Jane', 'John', 'Jake']
lname = ['Doe', 'Doe', 'Smith', 'FromStateFarm']
age = [37, 35, 50, 32]
salary = [75000, 77000, 100000, 35000]

ak_df = ak.DataFrame({
    'F_Name': ak.array(fname),
    'L_Name': ak.array(lname),
    'Age': ak.array(age),
    'Salary': ak.array(salary)
})
display(ak_df)

pd_df = pd.DataFrame({
    'F_Name': fname,
    'L_Name': lname,
    'Age': age,
    'Salary': salary
})
display(pd_df)

### Pandas to Arkouda

In [None]:
# using the pandas dataframe from the previous cell
ak_df = ak.DataFrame(pd_df)
display(ak_df)

### Arkouda to Pandas

In [None]:
# using the arkouda dataframe from the previous cell
pd_df = ak_df.to_pandas()
display(pd_df)

### DataFrame GroupBy

In [None]:
ak_g = ak_df.GroupBy("F_Name")
display(ak_g.count())

pd_g = pd_df.groupby(by="F_Name", axis=0)
display(pd_g.count())

### DataFrame Sorting

In [None]:
perm = ak_df.argsort("F_Name")
display(ak_df[perm])

display(pd_df.sort_values(by="F_Name"))

## Disconnecting from the Arkouda Server

### Disconnect
`ak.disconnect()`<br>
This disconnects the user's client from the server, but allows the server to continue running. This is more commonly used on distributed systems where multiple users may be leveraging the same Arkouda Server.

### Shutdown
`ak.shutdown()`<br>
This disconnects the user's client from the server, but also shuts the server down completely. This is useful when running tests on a personal computer.

In [None]:
# ak.disconnect()
ak.shutdown()