#### 2. Estimate the exponent of Zipfian distribution for unigrams, bigrams, and trigrams in this corpus.

![image](text-sequence_2_1.png)
![image](text-sequence_2_2.png)
![image](text-sequence_2_3.png)

```python
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import numpy as np

words = text.split()
unigram_vocab = Vocab(words, min_freq=5)

bigram_tokens = ['--'.join(pair) for pair in zip(words[:-1], words[1:])]
bigram_vocab = Vocab(bigram_tokens, min_freq=5)

trigram_tokens = ['--'.join(triple) for triple in zip(
    words[:-2], words[1:-1], words[2:])]
trigram_vocab = Vocab(trigram_tokens, min_freq=5)

unigram_vocab: list[tuple[str, int]] = [(a, b) for (a, b) in unigram_vocab.token_freqs if b >= 0]
bigram_vocab: list[tuple[str, int]] = [(a, b) for (a, b) in bigram_vocab.token_freqs if b >= 0]
trigram_vocab: list[tuple[str, int]] = [(a, b) for (a, b) in trigram_vocab.token_freqs if b >= 0]

vocab_list = [('unigram', unigram_vocab), ('bigram', bigram_vocab), ('trigram', trigram_vocab)]

# Function to calculate the sum of squared differences, including initial scale
def zipf_cost_with_scale(params, freqs):
    alpha, initial_scale = params
    N = len(freqs)
    rank = np.arange(1, N + 1)
    estimated_freqs = initial_scale * (rank ** (-alpha))
    return np.sum((freqs - estimated_freqs) ** 2)

# Loop through each vocabulary and perform optimization
results = {}
for name, vocab in vocab_list:
    freqs = [freq for token, freq in vocab]
    initial_params = [1.0, freqs[0]]  # Initial guesses for alpha and initial_scale
    opt_result = minimize(zipf_cost_with_scale, initial_params, args=(freqs,))
    results[name] = opt_result.x  # Store the optimized parameters

# Function to plot the results
def plot_freqs(name, freqs, alpha, initial_scale, filename = None):
    y_vals = [initial_scale * ((i + 1) ** (-alpha) if i != 0 else 0.0) for i in range(len(freqs))]
    y_vals[0] = y_vals[1]  # Adjust the first value
    x_vals = range(len(freqs))
    plt.figure()
    plt.plot(x_vals, freqs, label=f'{name} actual')
    plt.plot(x_vals, y_vals, label=f'{name} estimated')
    plt.yscale('log')
    plt.title(f"{name.capitalize()}, alpha = {alpha:.4f}")
    plt.legend()
    if filename is not None:
        plt.savefig(filename)
    plt.show()

# Plotting for each vocabulary
for i, (name, opt_params) in enumerate(results.items()):
    vocab_freqs = [freq for token, freq in globals()[f'{name}_vocab']]
    plot_freqs(name, vocab_freqs, *opt_params, f'../../Exercises/9_recurrent-neural-networks_2_{i}.png')
```