# Learning Deep Learning

In [52]:
import numpy as np
import matplotlib.pyplot as plt

from numpy.random import default_rng

from typing import Iterable,List,Dict,Tuple,Any,Optional

In [53]:
rng = default_rng()

## One Hot vectors

In [24]:
choices = ['ram','sri','duck','ramsay','nan']
words1 = [np.random.choice(choices) for _ in range(100)]
words2 = [np.random.choice(choices) for _ in range(100)]

In [17]:
c_dict = {word:i for i,word in enumerate(choices)}
oneHot = np.zeros((len(words),len(choices)))
for i,word in enumerate(words):
    oneHot[i][c_dict[word]] = 1
    
# for w,hot in zip(words[:10],oneHot[:10]): print(w,hot)

In [18]:
def oneHot(payload:List[Any])->np.ndarray:
    choices = list(set(payload))
    c_dict = {word:i for i,word in enumerate(choices)}
    res = np.zeros((len(words),len(choices)))
    for i,word in enumerate(words):
        res[i][c_dict[word]] = 1
    
    return res,choices
oneHot(words)[0][:10]

array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0.]])

In [27]:
a,_ = oneHot(words1)
b,_ = oneHot(words2)

## MLE - Entropy

> **Maximum likelyhood estimation** --> Maximizing the likelyhood for events and their probabilities

Eg:

probabilities of A = 0.7,0.1,0.6,0.2 = 0.0084
probabilities of B = 0.8,0.7,0.9,0.6 = 0.3024

we can put that in logs for better calculations, this is called **log likelyhood**
$$ log(A) = log(0.7)+log(0.1)+log(0.6)+log(0.2) = -4.8 $$

> log(x) for anything less 1 is always negative, so use negative sum

> This negative sum of log likelyhood is called **Cross Entropy** or **NLL**
$$ crossEntropy(A) = -log(0.7)-log(0.1)-log(0.6)-log(0.2) = 4.8 $$

### Cross Entropy loss
> Shows how good the mapping between the events and their probabilites are, smaller the NLL better the mapping holds good

## Logistic Regression

Normally, for K classes 
NLL (or) CrossEntropy loss = $ -\sum_{i=0}^{m}\sum_{j}^{k} log(p_{ij}) $

For an one-hot vector style notation

$$ Error\ , J(y,\hat{y}) = -\frac{1}{m}\sum_{i=0}^{m}\sum_{j}^{k} (y_{ij}log(\hat{y_{ij}}) + (1-y_{ij})log(1-\hat{y_{ij}})) $$

In [19]:
def NLL(probs): return -1*np.sum(np.log(probs)).item()

In [20]:
NLL([0.7,0.1,0.6,.2])

4.779523573132869

In [76]:
def crossEntropyLoss(y,yHat):
    assert y.shape == yHat.shape, "Shapes are not matching"
    assert y.shape[1] > 1, "Expected One-hot vectors"
    return (-1*np.sum(np.sum(y*np.nan_to_num(np.log(yHat)) + (1-y)*np.nan_to_num(np.log(1-yHat)),axis=1),axis=0)/y.shape[0]).item()

a,_ = oneHot(words1)
b = np.abs(rng.standard_normal(a.shape))
crossEntropyLoss(a,b)

  return (-1*np.sum(np.sum(y*np.nan_to_num(np.log(yHat)) + (1-y)*np.nan_to_num(np.log(1-yHat)),axis=1),axis=0)/y.shape[0]).item()


2.90285083992997

## Gradient Descent

1. have weights and biases - random
2. get yHat, loss
3. find derivative of loss w.r.t. W,b
4. Update W,b 
5. Repeat 1-4, until desired loss is reached

In [82]:
class LogisticRegression:
    def __init__(self,features:int,classes:int,learning_rate:float=0.01):
        self.N = features
        self.K = classes
        
        rng = np.random.default_rng()
        self.W = rng.standard_normal(self.N,1)
        self.b = rng.standard_normal(self.N,1)
        
        self.loss_fn = crossEntropyLoss
        
    def to_one_hot(self,y):
        # TODO write a onehoter with choices saved
        # Use sklearn- onehot encoder
        ...
    

In [83]:
from sklearn.preprocessing import OneHotEncoder