In [1]:
import warnings
warnings.filterwarnings('ignore')

# Markdown
* a document formatting approach that makes writing documentation (or anything) pretty easy
* can do markdown outside of jupyter
    * filename `somefile.md`
* you can make code cells

```python
def example_function(x):
    return f'I am {x}'
```



# Machine Learning Issues

![xkcd_curve_fitting.png](images/xkcd_curve_fitting.png)

# Let's consider a scatterplot
* at differing levels of complexity
    * complexity can refer to number of features, width of a matrix, etc
    * complexity here just means order of a polynomial
    
$$
f(x) = ax^4 + bx^3 + cx^2 + dx + C
$$

In [4]:
import numpy as np
import matplotlib.pyplot as plt

def plot_scatter_fit(num_samps=50, fit_complexity=1, degree=1):
    N = num_samps
    
    np.random.seed(seed=23)
    
    # gen rand x vals
    x = np.random.rand(N)*20
    
    
    # define polynomial
    if degree == 1:
        y = 5*x + 3
    elif degree == 2:
        y = 3*x**2 + 5*x + 3
    elif degree == 3:
        y = -0.2*x**3 + 3*x**2 + 5*x + 3
    elif degree == 4:
        y = 0.006*x**4 + -0.2*x**3 + 3*x**2 + 5*x + 3


    # help us draw a fit line
    fit = np.polyfit(x, y, 1)
    fit_fn = np.poly1d(fit)
    
    
    # get scatter range
    y_val_range = np.ptp(y)
    
    
    # add guassian/normal noise
    y = [pt + np.random.normal(loc=0.0, scale=((1./10.)*y_val_range)) 
         for pt in y]

    # line that we draw
    fit_complexity_ = fit_complexity
    regr = np.poly1d(np.polyfit(x, y, fit_complexity_))(np.unique)

plot_scatter_fit(num_samps=50, fit_complexity=1, degree=1)

[54.86104438520689, 106.84884775981874, 83.6592369620516, 35.26480506429657, 24.407061017614808, 71.17787533354355, 29.89272451347685, 41.324682758410034, 60.68980968179408, 38.79512377100393, 2.062277493713645, 93.26260698369234, 96.50293571507763, 34.32728110932617, 58.71530894841513, 84.87961709957827, 93.56368699688853, -0.19881628364814574, 30.896643212933675, 43.453943448548756, 87.69858190029016, 57.661965931295185, 20.92849353558345, 0.4307242200282575, 92.4976832890492, 29.32109807150527, 51.77865487373469, 68.8194723786878, 79.41168618225345, 45.96021338027418, 90.15667657971397, 87.76959382408124, 12.466726921358836, 78.21741590211985, 17.64605289098964, 3.8176287049534636, 84.81968061406893, 49.26882829568527, 18.212662216187955, 52.28179638090061, 60.097796492376965, 90.24844202658008, 77.59094662423539, 77.67432480090602, 13.046884812863123, 17.166569603102296, 74.63789956413586, 46.83200670546408, 55.61953077146252, 90.58026042313593]
