# Code from previous chapters

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

def predict(x, w, b):
    """
    Computes the prediction from the input x, weight w and bias b as y_hat = w*x + b.
    
    parameters:
    -----------
    X: array
        the input ("feature") for the prediction, i.e. the nuber of reservations
        can be a single number or an array of numbers
    w: number 
        the "weight" for this feature in the linear model
    b: number
        the bias of the linear model
    
    returns:
    --------
    y_hat: array 
        the predictions, i.e. the predicted numbers of pizzas that  will be ordered
        the dimensions of y are those of x
    """
    y_hat = w*x + b
    return y_hat


def loss(y, y_hat):
    """
    Computes the loss as the mean squared difference between predicted and true values. 
    
    parameters:
    -----------
    y: array 
        single number or numpy array
        the true label values in the list of examples, i.e. the true numbers of reservations
    y_hat: array
        single number or numpy array
        dimensions must mach those of y
        the predicted values 
    
    returns:
    --------
    loss: number
        the squared error, i.e. the squared difference between prediction and the prediction, i.e. the predicted number of pizzas that  will be ordered. 
    """
    loss = np.mean( (y_hat - y)**2 )
    return loss



def train_naive(x, y, learning_rate=0.001, max_iterations=10000):
    """
    Trains a linear model. The weight-bias space is explored by 
        taking steps in the w-direction and the b-direction separately. 
    If no further improvement is obtained the training stops. 
    If no stop is reached within the maximum number of iterations
        a warning is printed and the current vales are returned.
    
    parameters:
    -----------
    x: array 
        single number or numpy array
        the feature values in the training examples
    y: array
        single number or numpy array
        dimensions must mach those of x
        the label values in the training examples
    learning_rate: number
        the learning rate
    max_iterations: integer
        the maximum number of iterations
    
    returns:
    --------
    [w, b, current_loss, i, ws, bs, losses]
    w: number 
        final weight
    b: number
        final bias
    current_loss: number
        final loss
    i: integer
        number of iterations performed
    ws: list
        weights for all iterations
    bs: list
        biases for all iterations
    losses: list
        loss for all iterations
    """
    w = 0 # initial weight
    b = 0 # initial bias
    
    ws = []
    bs = []
    losses = []
    
    for i in range(max_iterations):
        current_loss = loss(predict(x, w, b), y)
        #print('Iteration %4d: , w=%.3f, b=%.3f, current_loss: %.3f' %(i, w, b, current_loss))
        
        ws.append(w)
        bs.append(b)
        losses.append(current_loss)
        
        if loss(predict(x, w+learning_rate, b), y) < current_loss:
            w += learning_rate
        elif loss(predict(x, w-learning_rate, b), y) < current_loss:
            w -= learning_rate
        elif loss(predict(x, w, b+learning_rate), y) < current_loss:
            b += learning_rate        
        elif loss(predict(x, w, b-learning_rate), y) < current_loss:
            b -= learning_rate        
        else:
            return [w, b, current_loss, i, ws, bs, losses]
    print('Could not converge in ', max_iterations, 'iterations')
    return [w, b, current_loss, i, ws, bs, losses]




# ========================================================
# ===== AUXILIURY FUNCTIONS FOR PLOTTING 
# ========================================================

def plotpath(ws, bs, losses, X=None, Y=None, step=1, xlabel='weight', ylabel='bias', figsize=(12,4)):
    fig = plt.figure(figsize=figsize)
    gs = fig.add_gridspec(3, 3)
    ax1 = fig.add_subplot(gs[:, 0])
    ax2 = fig.add_subplot(gs[0, 1])
    ax3 = fig.add_subplot(gs[1, 1])
    ax4 = fig.add_subplot(gs[2, 1])
    ax5 = fig.add_subplot(gs[:, 2])
    
    inds = np.arange(len(ws))
    
    # path in 2d parameter space
    ax1.plot(ws[::step], bs[::step], '-')
    ax1.set_xlabel(xlabel)
    ax1.set_ylabel(ylabel)
    ax1.annotate('iterations: {} \nfinal weight = {:.3f} \nfinal bias = {:.3f}'.format(len(ws), ws[-1], bs[-1]), (0.5, 0.5), xycoords='axes fraction', va='center')
    ax1.grid()
    
    # weight
    ax2.plot(inds[::step], ws[::step])
    ax2.set_xlabel('iteration')
    ax2.set_ylabel(xlabel)
    ax2.grid()

    # bias
    ax3.plot(inds[::step], bs[::step])
    ax3.set_xlabel('iteration')
    ax3.set_ylabel(ylabel)
    ax3.grid()

    # loss
    ax4.semilogy(inds[::step], losses[::step])
    ax4.set_xlabel('iteration')
    ax4.set_ylabel('loss')
    ax4.annotate('final loss = %.4f'%losses[-1], (0.5, 0.5), xycoords='axes fraction', va='center')
    ax4.grid()

    # data and linear fit
    if X is not None and Y is not None:
        Y_hat = predict(X, ws[-1], bs[-1])
        ax5.plot(X,Y,'bo')
        ax5.plot(X, Y_hat)
        ax5.set_xlabel('feature: number of reservations')
        ax5.set_ylabel('label: number of pizzas')
        ax5.set_xlim([0, 30]) 
        ax5.set_ylim([0, 50]) 
        
    plt.tight_layout()

# Daten mit drei features, vorläufig
Hier werden die Daten mit drei Features geladen und in eine Matrix gepackt. 
* Zeilenindex i: numeriert die Beispiele
* Spaltenindex j: numeriert die features

Diese Matrix ist nur vorläufig, da wir später noch eine Spalte Einsen hinzufügen werden (siehe unten). 

In [5]:
x1, x2, x3, y = np.loadtxt('C:\Users\Lenn Messelken\Documents\Coden\Seminar AI\data\pizza_3_vars.txt', skiprows=1, unpack=True)
X = np.column_stack((x1, x2, x3))
m = len(X)
print('number of examples: m =',m)


SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (2624449147.py, line 1)

## Einzelne Einträge einer Matrix ausgeben

X[0,0] gibt den Eintrag in der ersten Zeile und ersten Spalte aus (beachte das zero-based-indexing).

X[3,1] gibt den Eintrag in Zeile 4 (Zeilenindex 3) und Spalte 2 (Spaltenindex 1) aus. 



## Teile einer Matrix ausgeben, slicing

Der Befehl X[:4,:] druckt 
- die Zeilen 0 bis 3 (beachte: das sind die ersten VIER Zeilen)
- alle Spalten (der Doppelpunkt ohne Zahl davor und/oder dahinter bedeutet "alles")

Beachte: dieser Befehl ist gleichbedeutent mit X[:4]. Weiter hinten stehende nicht explizit genannte Indices werden vollständig gesliced. 

Probiere beides aus:

Der Befehl x[2:4, 1:] gibt die Zeilen mit den Indices 2 und 3 aus (also Zeilen 3 und 4), und aus diesen jeweils die Spalten mit den Indices 1 aufwaerts, hier also die zweite und dritte Spalte. 

Probiere es aus: 

### Aufgaben

- Gib alle Daten (die gesamte Matrix X) aus, einmal ohne und einmal mit slicing
- Gib die Daten aus Zeilen 3 und Spalten 1 und 2 aus. 
- Gib die Daten aus Zeilen 3 bis 5 und Spalten 1 bis 3 aus. 

# Daten mit drei features, endgültig
Hier wird, wie im Skript besprochen, eine Spalte Einsen als erste Spalte hinzugefügt. 

Ausserdem wird aus dem Vektor y auch eine Matrix gemacht. Dies ist nötig, damit np.matmul damit umgehen kann. 

In [6]:
x1, x2, x3, y = np.loadtxt('../data/pizza_3_vars.txt', skiprows=1, unpack=True)
x = np.column_stack((np.ones(x1.size), x1, x2, x3))
y = y.reshape(-1, 1) # cast array into a matrix

m = len(x)
print('number of examples: m =',m)


number of examples: m = 30


# Vorhersage in Matrixschreibweise

In [98]:
# ==============================================
# ===   WRITE YOUR OWN CODE 
# ==============================================
#
# no help: write everything yourself, including the docstring
#
# Level-1-help: copy/paste the docstring from moodle
#
# Level-2-help: look up the code from the teachers printout




# Apply your function

In [99]:
# select the colums 1 to 3 (1: ones, 2: reservations, 3: temperature)
x2 = x[:,:3]

In [100]:
# compute the prediction for the following weights:
# bias = optimal value from training with one feature
# w for reservations = optimal value from training with one feature
# w for temperature = Zero
# pack these into a vector called w like this: w = np.array([..., ..., ...])
# 
# then compute the prediction as y_hat_1 = ...


In [101]:
# une nonzero values of the weight for the temperature to compute predictions


In [1]:
# plot the data alongside a prediction. 
# use the number of reservations on the x-axis and the prediction on the y axis
# for nonzero values of the temperature weight you don't get a straight line. Explain. 

#plt.plot(x[:,1], y, 'ob')
#plt.plot(x[:,1], y_hat_1, '+k')



In [2]:
# play with the parameters b, w_reservations and w_temperature. 
# Try to improve the agreement between data and prediction
