# Introduction to LogMl

**Important**: This assumes that you installed LogMl in the default directory, according to the documentation and are running this notebook from that directory (i.e. $HOME/logml)

### Initialize Jupyter Notebook

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
import os

from pathlib import Path

Set path to use 'src' subdir

In [None]:
logml_src = str(Path(os.getcwd())/'src')
print(f"Adding Python path '{logml_src}'")
sys.path.append(logml_src)

# Create Dataset

In [None]:
dataset_name = 'intro'

In [None]:
def rand_norm(num, mean, std, na_prob):
    xi = np.random.normal(mean, std, num)
    xi_na = (np.random.rand(num) <= na_prob)
    xi[xi_na] = np.nan
    return xi

# Create dataset
def create_dataset(num=1000, prob_na=0.05):
    # Inputs: x1, x2, x3
    dfdict = dict()
    x1 = rand_norm(num, 0, 1, prob_na)
    x2 = rand_norm(num, 1, 1, prob_na)
    x3 = rand_norm(num, -1, 1, prob_na)
    n = rand_norm(num, 0, 1, -1)
    # Output
    y = 2.0 * x1 - 1.0 * x2 + 0.5 * x3 + 0.1 * n
    # Create dataFrame
    df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'y': y})
    return df

Make sure dataset directory exists

In [None]:
data_dir = Path('data')/dataset_name
print(f"Creating dir {data_dir}")
! mkdir -p {data_dir}

Run function to create, then save it

In [None]:
df = create_dataset()

# Save to csv file
csv_file = Path('data')/dataset_name/f"{dataset_name}.csv"
print(f"Saving to {csv_file}")
df.to_csv(csv_file, index=False)

# Show first lines
df.head()

# Run LogMl

Remove old files from previous runs

In [None]:
!rm -vf data/{dataset_name}/*pkl

In [None]:
from logml import *
config_file = Path('config')/f"{dataset_name}.yaml"
ml = LogMl(config_file)
ml()