<center>

<h1 style="text-align:center"> Streams, Laziness and Memoization </h1>
<h2 style="text-align:center"> CS3100 Fall 2019 </h2>
</center>


$
\require{color}
\newcommand{\colorred}[1]{\color{\red}{\text{#1}}}
$


## Review

### Previously

* Modular Programming
  + Namespacing, Abstraction, Code Reuse
  + Structures, Signatures, Functors
  
### This lecture

* Streams: Programming with infinite data structures
* Laziness: Call-by-need evaluation

## Recursive values

* In OCaml, we can define recursive functions.
  + we can also define **recursive values**

In [38]:
(* Infinite list of ones *)
let rec ones = 1::ones

val ones : int list = [1; <cycle>]


In [39]:
(* Infinite list of alternating 0s and 1s *)
let rec zero_ones = 0::1::zero_ones

val zero_ones : int list = [0; 1; <cycle>]


Even though the list is **infinite**, the data structure uses **finite** memory. 

## Infinite data structures

Infinite data structures are not just an intellectual curiosity.

* Infinite sequences such as primes and fibonacci numbers.
* Streams of input read from file or socket.
* Game trees which may be infinite
  + Every possible move leads to branch in the tree. 
  + Imagine game trees where a piece could chase the other around forever. 

## Limitations of cyclic structures

Suppose we want to convert the infinite list `zero_ones` to string, the obvious solutions don't work. 

In [40]:
let zero_ones_string = List.map string_of_int zero_ones

error: runtime_error

## List to Streams

We can start with the list type

```ocaml
type 'a list = Nil | Cons of 'a * 'a list
```

and make a **stream** type.

In [41]:
type 'a stream = Cons of 'a * 'a stream

type 'a stream = Cons of 'a * 'a stream


There is no `Nil` since the streams are infinite. 

## Doesn't quite work

In [42]:
let rec zero_ones = Cons (0, Cons (1, zero_ones))

val zero_ones : int stream = Cons (0, Cons (1, <cycle>))


In [43]:
let rec to_string (Cons(x,xs)) = Cons(string_of_int x, to_string xs)

val to_string : int stream -> string stream = <fun>


In [44]:
to_string zero_ones

error: runtime_error

## Pausing the execution

* We need a way to pause the execution rather than recursively applying to the rest of the list. 
* Use **thunks**: `unit -> 'a` functions.

In [45]:
let v = failwith "error"

error: runtime_error

## Pausing the execution

In [46]:
let f = fun () -> failwith "error"

val f : unit -> 'a = <fun>


In [47]:
f ()

error: runtime_error

## Streams again

In [48]:
type 'a stream = Cons of 'a * (unit -> 'a stream)

type 'a stream = Cons of 'a * (unit -> 'a stream)


In [49]:
let rec zero_ones = Cons (0, fun () -> Cons (1, fun () -> zero_ones))

val zero_ones : int stream = Cons (0, <fun>)


In [50]:
let hd (Cons (x, _)) = x

val hd : 'a stream -> 'a = <fun>


In [51]:
let tl (Cons (_, xs)) = xs ()

val tl : 'a stream -> 'a stream = <fun>


## More Stream functions

In [52]:
let rec take n s = 
  if n = 0 then []
  else (hd s)::(take (n-1) (tl s))

val take : int -> 'a stream -> 'a list = <fun>


In [53]:
take 10 zero_ones

- : int list = [0; 1; 0; 1; 0; 1; 0; 1; 0; 1]


In [54]:
let rec drop n s =
  if n = 0 then s
  else drop (n-1) (tl s)

val drop : int -> 'a stream -> 'a stream = <fun>


In [55]:
drop 1 zero_ones

- : int stream = Cons (1, <fun>)


## Higher order functions on streams

In [56]:
let rec map f s = Cons (f (hd s), fun () -> map f (tl s))

val map : ('a -> 'b) -> 'a stream -> 'b stream = <fun>


In [57]:
let zero_ones_str = map string_of_int zero_ones

val zero_ones_str : string stream = Cons ("0", <fun>)


In [58]:
take 10 zero_ones_str

- : string list = ["0"; "1"; "0"; "1"; "0"; "1"; "0"; "1"; "0"; "1"]


## Higher order functions on streams

In [59]:
(** [filter p s] returns a new stream where every element [x] in [s] 
    such that [p x = true] is removed *)
let rec filter p s =
  if p (hd s) then filter p (tl s)
  else Cons (hd s, fun () -> filter p (tl s))

val filter : ('a -> bool) -> 'a stream -> 'a stream = <fun>


In [60]:
let s' = filter ((=) 0) zero_ones in
take 10 s'

- : int list = [1; 1; 1; 1; 1; 1; 1; 1; 1; 1]


## Higher order functions on streams

In [61]:
let rec zip f s1 s2 = Cons (f (hd s1) (hd s2), fun () -> zip f (tl s1) (tl s2))

val zip : ('a -> 'b -> 'c) -> 'a stream -> 'b stream -> 'c stream = <fun>


In [62]:
zip (fun x y -> (x,y)) zero_ones zero_ones_str

- : (int * string) stream = Cons ((0, "0"), <fun>)


## Primes

* **Sieve of Eratosthenes**: Neat way to compute primes.
* Start with a stream `s` of `[2;3;4;.....]`.
* At each step, 
  + `p = hd s` is a prime.
  + return a new stream `s'` such that $\forall x.x \text{ mod } p \notin s'$
* In the first step,
  + `prime = 2`
  + `new stream = [3;5;7;9;11;13;15;17;....]`
* In the second step,
  + `prime = 3`
  + `new stream = [5;7;11;13;17;19;23;....]`

## Primes

In [63]:
let rec from n = Cons (n, fun () -> from (n+1));;
from 2

val from : int -> int stream = <fun>


- : int stream = Cons (2, <fun>)


In [64]:
let primes_stream =
  let rec primes s = Cons (hd s, fun () -> 
    primes @@ filter (fun x -> x mod (hd s) = 0) (tl s))
  in primes (from 2)

val primes_stream : int stream = Cons (2, <fun>)


In [65]:
take 100 @@ primes_stream

- : int list =
[2; 3; 5; 7; 11; 13; 17; 19; 23; 29; 31; 37; 41; 43; 47; 53; 59; 61; 67; 71;
 73; 79; 83; 89; 97; 101; 103; 107; 109; 113; 127; 131; 137; 139; 149; 151;
 157; 163; 167; 173; 179; 181; 191; 193; 197; 199; 211; 223; 227; 229; 233;
 239; 241; 251; 257; 263; 269; 271; 277; 281; 283; 293; 307; 311; 313; 317;
 331; 337; 347; 349; 353; 359; 367; 373; 379; 383; 389; 397; 401; 409; 419;
 421; 431; 433; 439; 443; 449; 457; 461; 463; 467; 479; 487; 491; 499; 503;
 509; 521; 523; 541]


## Fibonacci sequence

* Let's consider Fibonacci sequence
  + `s1 = [1;1;2;3;5;8;13;...]`
* Let's consider the tail of `s1`
  + `s2 = [1;2;3;5;8;13;....]`
* Let's zip `s1` and `s2` by adding together the elements:
  + `s3 = [2;3;5;8;13;21;...]`
  + `s3` is nothing but the tail of tail of fibonacci sequence. 
* If we were to prepend `[1;1]` to `s3` we will have the fibonacci sequence.



## Fibonacci sequence

In [66]:
let rec fibs = 
  Cons (1, fun () -> 
    Cons (1, fun () -> 
      zip (+) fibs (tl fibs)))

val fibs : int stream = Cons (1, <fun>)


In [67]:
take 30 fibs

- : int list =
[1; 1; 2; 3; 5; 8; 13; 21; 34; 55; 89; 144; 233; 377; 610; 987; 1597; 2584;
 4181; 6765; 10946; 17711; 28657; 46368; 75025; 121393; 196418; 317811;
 514229; 832040]


## Fibonacci sequence

* Each time we force the computation of the next element, we compute the fibonacci of previous element twice.
  + Not immediately apparent, but this is equivalent to:

```ocaml
let rec fib n = if n < 2 then 1 else fib (n-1) + fib (n-2)
```

There is an exponential increase in the running time of `fib(n)` for each increase in `n`.

## Lazy Values

* It would be nice to **save** the results of the execution for previously seen values and reuse them.
  + This is the idea behind lazy values in OCaml.
* Lazy values are the opt-in, explicit, **call-by-need** reduction strategy for OCaml
  + Rest of the language is strict i.e, call-by-value
* Lazy module in OCaml is:

```ocaml
module Lazy = struct
  type 'a t = 'a lazy_t
  val force : 'a t -> 'a
end
```

OCaml has syntactic support for lazy values through the `lazy` keyword.

## Lazy values

In [68]:
let v = lazy (10 + (print_endline "Hello"; 20))

val v : int lazy_t = <lazy>


In [69]:
Lazy.force v

Hello


- : int = 30


In [70]:
Lazy.force v

- : int = 30


## Lazy fib

In [71]:
let fib30lazy = lazy (take 30 fibs |> List.rev |> List.hd)

val fib30lazy : int lazy_t = <lazy>


In [72]:
Lazy.force fib30lazy

- : int = 832040


In [73]:
let fib31lazy = take 31 fibs |> List.rev |> List.hd

val fib31lazy : int = 1346269


## Lazy stream

Let's redefine the stream using lazy values. 

In [74]:
type 'a stream = Cons of 'a * 'a stream Lazy.t

type 'a stream = Cons of 'a * 'a stream Lazy.t


In [75]:
let hd (Cons (x,l)) = x
let tl (Cons (x,l)) = Lazy.force l
let rec take n s = 
  if n = 0 then [] else (hd s)::(take (n-1) (tl s))
let rec zip f s1 s2 = 
  Cons (f (hd s1) (hd s2), lazy (zip f (tl s1) (tl s2)))

val hd : 'a stream -> 'a = <fun>


val tl : 'a stream -> 'a stream = <fun>


val take : int -> 'a stream -> 'a list = <fun>


val zip : ('a -> 'b -> 'c) -> 'a stream -> 'b stream -> 'c stream = <fun>


## Fibs Lazy Streams

In [76]:
let rec fibslazystream = 
  Cons (1, lazy (
    Cons (1, lazy (
      zip (+) fibslazystream (tl fibslazystream)))))

val fibslazystream : int stream = Cons (1, <lazy>)


In [77]:
take 30 fibslazystream

- : int list =
[1; 1; 2; 3; 5; 8; 13; 21; 34; 55; 89; 144; 233; 377; 610; 987; 1597; 2584;
 4181; 6765; 10946; 17711; 28657; 46368; 75025; 121393; 196418; 317811;
 514229; 832040]


You can see that this is fast!

## Memoization

* Lazy values in OCaml are a specific efficient implementation of the general idea of caching called **Memoization**.
  + Add caching to functions to retrieve results fast. 

In [78]:
let memo f = 
  let cache = Hashtbl.create 16 in
  fun v -> 
    match Hashtbl.find_opt cache v with
    | None -> 
        let res = f v in 
        Hashtbl.add cache v res;
        res
    | Some res -> res

val memo : ('a -> 'b) -> 'a -> 'b = <fun>


## Expensive identity

In [79]:
let rec spin n = if n = 0 then () else spin (n-1)

val spin : int -> unit = <fun>


In [80]:
let expensive_id x = spin 200000000; x

val expensive_id : 'a -> 'a = <fun>


In [81]:
expensive_id 10

- : int = 10


## Memoizing expensive identity

In [82]:
let memoized_expensive_id = memo expensive_id

val memoized_expensive_id : '_weak1 -> '_weak1 = <fun>


In [83]:
memoized_expensive_id 11

- : int = 11


## Memoizing recursive functions

* Memoizing recursive functions is a bit more tricky.
  + We need to tie the **recursive knot**

In [84]:
let rec fib n = 
  if n < 2 then 1 else fib(n-2) + fib(n-1)

val fib : int -> int = <fun>


In [85]:
fib 40

- : int = 165580141


## Memoizing recursive functions

Simply doing `let memo_fib = memo fib` will only memoize the outer calls and not the recursive calls.

In [86]:
let memo_fib = memo fib

val memo_fib : int -> int = <fun>


In [101]:
memo_fib 40

- : int = 165580141


## Tying the recursive knot

This function should remind you of the definition we used for Y combinator.

In [88]:
let fib_norec f n = if n < 2 then 1 else f (n-1) + f(n-2)

val fib_norec : (int -> int) -> int -> int = <fun>


The idea is to provide an `f` which is the memoized version of 

```ocaml
let rec f n = if n < 2 then 1 else f (n-1) + f(n-2)
```

We will use a **reference** to tie the knot.

## Tying the recursive knot

`memo_rec` will memoize recursive function that take an explicit recursive function argument such as `fib_norec`.

In [89]:
let memo_rec f_norec =
  (* define a reference [f] to a function which will never be invoked *)
  let f : ('a -> 'b) ref = ref (fun _ -> assert false) in
  (* memoize the "eta-expanded" [f_norec] function by dereferencing [f]. *)
  let f_rec_memo = memo (fun (x : 'a) -> f_norec !f x) in
                                      (* [f] is not dereferenced yet *)
  f := f_rec_memo; (* update [f] to the recursive memoized function *)
  f_rec_memo

val memo_rec : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b = <fun>


In [90]:
let fib_memo = memo_rec fib_norec

val fib_memo : int -> int = <fun>


In [102]:
fib_memo 40

- : int = 165580141


## Edit distance

* Memoization is a general solution for **dynamic programming**.
* Let's compute **edit distance** (aka **Levenshtein distance**) between two strings.
* Example: 
   * edit_distance("kitten","sitting") = 3
   * $\colorred{k}$itten -> $\colorred{s}$itten
   * sitt$\colorred{e}$n -> sitt$\colorred{i}$n
   * sittin -> sittin$\colorred{g}$

## Timing the execution

In [92]:
(* Returns the execution time of [f v] in milliseconds *)
let time_it f v =
  let s = Unix.gettimeofday() in
  let res = f v in
  let e = Unix.gettimeofday () in
  (res, (e -. s) *. 1000.)

val time_it : ('a -> 'b) -> 'a -> 'b * float = <fun>


## Edit distance

In [93]:
let rec edit_distance ?log (s,t) = 
  let open String in
  if log = Some true then print_endline (s ^ " " ^ t);
  match String.length s, String.length t with
  | 0,x | x,0 -> x
  | len_s, len_t ->
    let s' = sub s 0 (len_s - 1) in
    let t' = sub t 0 (len_t - 1) in
    List.fold_left (fun acc v -> min acc v) max_int [
      edit_distance ?log (s',t) + 1; (* insert at end of s *)
      edit_distance ?log (s,t') + 1; (* delete from end of s *)
      edit_distance ?log (s',t') +   
        if get s (len_s-1) = get t (len_t-1) then 0 else 1
    ]

val edit_distance : ?log:bool -> string * string -> int = <fun>


## Edit distance

In [94]:
time_it (edit_distance ~log:true) ("OCaml", "ocaml")

OCaml ocaml
OCam ocam
OCa oca
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OCa oc
OC o
O 
OC 
O o
 
O 
 o
OCa o
OC 
OCa 
OC o
O 
OC 
O o
 
O 
 o
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OC oca
O oc
 o
O o
 
O 
 o
 oc
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
OCam oca
OCa oc
OC o
O 
OC 
O o
 
O 
 o
OCa o
OC 
OCa 
OC o
O 
OC 
O o
 
O 
 o
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OCam oc
OCa o
OC 
OCa 
OC o
O 
OC 
O o
 
O 
 o
OCam o
OCa 
OCam 
OCa o
OC 
OCa 
OC o
O 
OC 
O o
 
O 
 o
OCa oc
OC o
O 
OC 
O o
 
O 
 o
OCa o
OC 
OCa 
OC o
O 
OC 
O o
 
O 
 o
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OCa oca
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OCa oc
OC o
O 
OC 
O o
 
O 
 o
OCa o
OC 
OCa 
OC o
O 
OC 
O o
 
O 
 o
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OC oca
O oc
 o
O o
 
O 
 o
 oc
OC oc


O oc
 o
O o
 
O 
 o
 oc
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
O ocam
 oca
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
 ocam
OCa ocaml
OC ocam
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
OC oca
O oc
 o
O o
 
O 
 o
 oc
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
O ocam
 oca
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
 ocam
OCa ocam
OC oca
O oc
 o
O o
 
O 
 o
 oc
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
OCa oca
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OCa oc
OC o
O 
OC 
O o
 
O 
 o
OCa o
OC 
OCa 
OC o
O 
OC 
O o
 
O 
 o
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
OC oca
O oc
 o
O o
 
O 
 o
 oc
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o
 oc
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
OC ocam
O oca
 oc
O oc
 o
O o
 
O 
 o
 oc
 oca
OC oca
O oc
 o
O o
 
O 
 o
 oc
OC oc
O o
 
O 
 o
OC o
O 
OC 
O o
 
O 
 o
O oc
 o
O o
 
O 
 o

- : int * float = (2, 10.4248523712158203)


## Edit distance

In [95]:
time_it (edit_distance ~log:false) ("OCaml 4.08", "ocaml 4.08")

- : int * float = (2, 8690.44303894043)


## Memoize edit distance

In [96]:
let rec edit_distance_norec ?log f (s,t) = 
  let open String in
  if log = Some true then print_endline (s ^ " " ^ t);
  match String.length s, String.length t with
  | 0,x | x,0 -> x
  | len_s, len_t ->
    let s' = sub s 0 (len_s - 1) in
    let t' = sub t 0 (len_t - 1) in
    List.fold_left (fun acc v -> min acc v) max_int [
      f (s',t) + 1; (* insert at end of s *)
      f (s,t') + 1; (* delete from end of s *)
      f (s',t') +   
        if get s (len_s-1) = get t (len_t-1) then 0 else 1
    ]

val edit_distance_norec :
  ?log:bool -> (string * string -> int) -> string * string -> int = <fun>


## Memoize edit distance

In [97]:
let memo_edit_distance = memo_rec (edit_distance_norec ~log:true)

val memo_edit_distance : string * string -> int = <fun>


In [98]:
time_it memo_edit_distance ("OCaml 4.08", "ocaml 4.08")

OCaml 4.08 ocaml 4.08
OCaml 4.0 ocaml 4.0
OCaml 4. ocaml 4.
OCaml 4 ocaml 4
OCaml  ocaml 
OCaml ocaml
OCam ocam
OCa oca
OC oc
O o
 
O 
 o
OC o
OC 
O oc
 oc
OCa oc
OCa o
OCa 
OC oca
O oca
 oca
OCam oca
OCam oc
OCam o
OCam 
OCa ocam
OC ocam
O ocam
 ocam
OCaml ocam
OCaml oca
OCaml oc
OCaml o
OCaml 
OCam ocaml
OCa ocaml
OC ocaml
O ocaml
 ocaml
OCaml  ocaml
OCaml  ocam
OCaml  oca
OCaml  oc
OCaml  o
OCaml  
OCaml ocaml 
OCam ocaml 
OCa ocaml 
OC ocaml 
O ocaml 
 ocaml 
OCaml 4 ocaml 
OCaml 4 ocaml
OCaml 4 ocam
OCaml 4 oca
OCaml 4 oc
OCaml 4 o
OCaml 4 
OCaml  ocaml 4
OCaml ocaml 4
OCam ocaml 4
OCa ocaml 4
OC ocaml 4
O ocaml 4
 ocaml 4
OCaml 4. ocaml 4
OCaml 4. ocaml 
OCaml 4. ocaml
OCaml 4. ocam
OCaml 4. oca
OCaml 4. oc
OCaml 4. o
OCaml 4. 
OCaml 4 ocaml 4.
OCaml  ocaml 4.
OCaml ocaml 4.
OCam ocaml 4.
OCa ocaml 4.
OC ocaml 4.
O ocaml 4.
 ocaml 4.
OCaml 4.0 ocaml 4.
OCaml 4.0 ocaml 4
OCaml 4.0 ocaml 
OCaml 4.0 ocaml
OCaml 4.0 ocam
OCaml 4.0 oca
OCaml 4.0 oc
OCaml 4.0 o
OCaml 4.0 
OCaml 4. ocam

- : int * float = (2, 0.725984573364257812)


<center>

<h1 style="text-align:center"> Fin. </h1>
</center>