In [None]:
# default_exp util

# ml_util

> Machine learning utility functions

## Data Structures

Vectors are objects that can be added together to dorm new vectors and that can be multiplied by scalars(numbers), also to form new vectors. For us, vectors are points in some finite-dimensional space. 

In [None]:
#export
from typing import List, NamedTuple

Vector = List[float]

In [None]:
#export
class LabeledPoint(NamedTuple):
    point: Vector
    label: str

## Math

In [None]:
#export
import math

def dot(v: Vector, w: Vector) -> float:
    """Returns v_1 * w_1 + ... + v_n * w_n"""
    assert len(v) == len(w), "vectors must be same length"
    
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sum_of_squares(v: Vector) -> float:
    """Returns vect_1 * vect_2 + ... + vect_n * vect_n"""
    return dot(v, v)

def magnitude(v: Vector) -> float:
    """Returns the magnitude (or length) of v"""
    return math.sqrt(sum_of_squares(v))
                    
def distance(v: Vector, w: Vector) -> float:
    """Computes distance between v and w"""
    return magnitude(subtract(v, w))

def euclidean_distance(v: Vector, w: Vector) -> float:
    """ """
    distance = 0.0
    for i in range(len(row1)-1):
        distance += (row1[i] - row2[i])**2
    return sqrt(distance)

### Data Preprocessing

In [None]:
#export
import random
from typing import TypeVar, List, Tuple

X = TypeVar('X')    # generic type to represent a data point

In [None]:
#export
def split_data(data: List[X], prob: float) -> Tuple[List[X], List[X]]:
    """Split data into fractions"""
    data = data[:]
    random.shuffle(data)
    cut = int(len(data) * prob)
    return data[:cut], data[cut:]

In [None]:
#hide

# test

data = [n for n in range(1000)]
train, test = split_data(data, 0.75)
assert len(train) == 750
assert len(test) == 250

In [None]:
#hide
from nbdev.showdoc import *