# NNodely Documentation - Load Data

Listed here are all the modalitites by which you can load data inside the nnodely framework

In [None]:
# uncomment the command below to install the nnodely package
#!pip install nnodely

from nnodely import *

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>-- nnodely_v1.3.1 --<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<


## Load a Dataset from files

Load a dataset inside the framework using a directory. 

You must specify a name for the dataset, the folder path and also the structure of the data so that the framework will know which column must be used for every input of the network.

In [2]:
input = Input('in1')
target = Input('target')
relation = Fir(input.tw(0.05))
output = Output('out', relation)

model = Modely(visualizer=None)
model.addMinimize('out', output, target.last())
model.neuralizeModel(0.01)

In [None]:
train_folder = 'path/to/train/folder'
data_struct = ['in1', '', 'target']
model.loadData(name='dataset', source=train_folder, format=data_struct)

you can also specify various parameters such as the number of lines to skip, the delimiter to use between data and if you want to include the header of the file.

In [None]:
model.loadData(name='dataset_2', source=train_folder, format=data_struct, skiplines=4, delimiter='\t', header=None)

## Load a dataset from a custom dictionary

you can build your own dataset with a dictionary containing all the necessary inputs of the network and passing it to the 'source' attribute

In [4]:
import numpy as np
data_x = np.array(range(10))
data_a = 2
data_b = -3
dataset = {'in1': data_x, 'target': (data_a*data_x) + data_b}

model.loadData(name='dataset_3', source=dataset)
print(model.data['dataset_3'])

[33m[loadData] Dataset named dataset_3 already loaded! overriding the existing one..[0m
{'target': array([[[ 5]],

       [[ 7]],

       [[ 9]],

       [[11]],

       [[13]],

       [[15]]]), 'in1': array([[[0],
        [1],
        [2],
        [3],
        [4]],

       [[1],
        [2],
        [3],
        [4],
        [5]],

       [[2],
        [3],
        [4],
        [5],
        [6]],

       [[3],
        [4],
        [5],
        [6],
        [7]],

       [[4],
        [5],
        [6],
        [7],
        [8]],

       [[5],
        [6],
        [7],
        [8],
        [9]]])}


## Load a dataset from a pandas DataFrame

you can also use a pandas dataframe as source for loading a dataset inside the nnodely framework

In [6]:
import pandas as pd
NeuObj.clearNames()
x = Input('x')
y = Input('y')
k = Input('k')
w = Input('w')

out = Output('out', Fir(x.tw(0.02) + y.tw(0.02)))
out2 = Output('out2', Fir(k.last()) + Fir(w.tw(0.05,offset=-0.02)))

model = Modely(visualizer=None)
model.addMinimize('out', out, out2)
model.neuralizeModel(0.01)

# Create a DataFrame with random values for each input
df = pd.DataFrame({
    'x': np.linspace(1,100,100, dtype=np.float32),
    'y': np.linspace(1,100,100, dtype=np.float32),
    'k': np.linspace(1,100,100, dtype=np.float32),
    'w': np.linspace(1,100,100, dtype=np.float32)})

model.loadData(name='dataset_4', source=df)

## Resampling a pandas DataFrame

if you have a column representing time (must be a datetime object) you can also use those values to resample the dataset using the sample time of the neuralized network

In [7]:
df = pd.DataFrame({
    'time': np.array([1.0,1.5,2.0,4.0,4.5,5.0,7.0,7.5,8.0,8.5], dtype=np.float32),
    'x': np.linspace(1,10,10, dtype=np.float32),
    'y': np.linspace(1,10,10, dtype=np.float32),
    'k': np.linspace(1,10,10, dtype=np.float32),
    'w': np.linspace(1,10,10, dtype=np.float32)})

model.loadData(name='dataset_resampled', source=df, resampling=True)

## Get Samples from the Dataset

Once a dataset is loaded, you can use it to get random samples from the dataset. Set the 'window' argument to choose the number of samples to get from the specific dataset

In [9]:
sample = model.getSamples(dataset='dataset_4', window=5)
result = model(sample, sampled=True)
print(result)

{'out2': [50.548545837402344, 51.15421676635742, 51.759891510009766, 52.36556625366211, 52.97123718261719], 'out': [171.63796997070312, 173.55892944335938, 175.47988891601562, 177.40086364746094, 179.3218231201172]}
