In [3]:
from functools import lru_cache

# Optimization

## Discrete optimization

**Problem computing edit distance**

*Input*: two strings, $s$ and $t$

*Output*: minimum number of character insertions, deletions, and substitutions it takes to change $s$ into $t$

**Example**

$s$: a cat!

$t$: the cats!

We are going to start from the end of both strings. If two characters are equal, we'll remove them. So now we have the strings:

$s$: a cat

$t$: tha cats

Now we can use three different strategies in order to transform each string:

 - substitution (substitute the character that is different):
 $s$: a cas
 $t$: the cats
 
 - insertion (insert the same character that is different):
 $s$: a cat
 $t$: the cat
 
 - deletion (delete the character that is different):
 $s$: a ca
 $t$: the cats
 
We want to minimize the number of steps (right?)

In [15]:
def computeEditDistance(s, t):
    cache = {}  # (m, n) => result
    def recurse(m, n):
        """
        Return the minimum edit distance between:
        - first m letters of s
        - first n letters of t
        """
        if (m, n) in cache:
            return cache[(m, n)]
        if m == 0:  # Base case
            result = n
        elif n == 0:  # Base case
            result = m
        elif s[m - 1] == t[n - 1]:  # Last letter matches
            result = recurse(m - 1, n - 1)
        else:
            subCost = 1 + recurse(m - 1, n - 1)
            delCost = 1 + recurse(m - 1, n)
            insCost = 1 + recurse(m, n - 1)
            result = min(subCost, delCost, insCost)
        cache[(m, n)] = result
        return result

    return recurse(len(s), len(t))

#print(computeEditDistance('a cat!', 'the cats!'))
print(computeEditDistance('a cat!' * 100, 'the cats!' * 100))

400


## Continous optimization

**Problem: finding the least squares line**

*Input:* set of pairs $\{(x_1,y_1),...,(x_ny_n)\}$

*Output:* $w\in\mathbb{R}$ that minimizes the squared error $F(w) = \sum_{i=1}^n(x_iw-y_i)^2$

In [16]:
points = [(2,4),(4,2)]

def F(w):
    return sum((w*x-y)**2 for x,y in points)

def dF(w):
    return sum(2*(w*x-y)*x for x,y in points)

w = 0
eta = 0.01
for t in range(1000):
    value = F(w)
    gradient = dF(w)
    w = w - eta * gradient
    print(f'it {t}: w = {w}, F(w) = {value}')

it 0: w = 0.32, F(w) = 20
it 1: w = 0.512, F(w) = 11.807999999999998
it 2: w = 0.6272, F(w) = 8.858880000000001
it 3: w = 0.69632, F(w) = 7.7971968
it 4: w = 0.737792, F(w) = 7.4149908479999995
it 5: w = 0.7626752, F(w) = 7.27739670528
it 6: w = 0.77760512, F(w) = 7.227862813900801
it 7: w = 0.786563072, F(w) = 7.210030613004288
it 8: w = 0.7919378432, F(w) = 7.203611020681545
it 9: w = 0.79516270592, F(w) = 7.201299967445356
it 10: w = 0.797097623552, F(w) = 7.200467988280327
it 11: w = 0.7982585741311999, F(w) = 7.200168475780918
it 12: w = 0.79895514447872, F(w) = 7.200060651281129
it 13: w = 0.799373086687232, F(w) = 7.200021834461207
it 14: w = 0.7996238520123392, F(w) = 7.200007860406035
it 15: w = 0.7997743112074035, F(w) = 7.200002829746172
it 16: w = 0.799864586724442, F(w) = 7.200001018708621
it 17: w = 0.7999187520346652, F(w) = 7.200000366735104
it 18: w = 0.7999512512207991, F(w) = 7.2000001320246385
it 19: w = 0.7999707507324795, F(w) = 7.200000047528869
it 20: w = 0.7999