In [65]:
import numpy as np

data_points: dict[str, np.ndarray] = {
    "USA": np.array([5, 6]),
    "Washington": np.array([10, 5]),
    "Turkey": np.array([3, 1]),
    "Ankara": np.array([9, 1]),
    "Russia": np.array([5, 5]),
    "Japan": np.array([4, 3]),
}


def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


# Find the cosine similarity between the vectors of the following pairs of data points with "Ankara" and "Washington"
print(cosine_similarity(data_points["Ankara"], data_points["Washington"]))
print(cosine_similarity(data_points["Ankara"], data_points["USA"]))
print(cosine_similarity(data_points["Ankara"], data_points["Turkey"]))
print(cosine_similarity(data_points["Ankara"], data_points["Russia"]))
print(cosine_similarity(data_points["Ankara"], data_points["Japan"]))

0.93834311681711
0.7211047102874315
0.9778024140774093
0.7808688094430302
0.861365903383803


In [66]:
A = np.array([[2, 2], [2, 2]])
A_squared = np.square(A)
A_squared

array([[4, 4],
       [4, 4]])

In [67]:
A_Frobenius = np.sqrt(np.sum(A_squared))
A_Frobenius

np.float64(4.0)

In [68]:
P = np.array([2, 2])
v = np.array([0, 1])
dot_product = np.dot(P, v.T)
print(dot_product)
sign_of_dot_product = np.sign(dot_product)
sign_of_dot_product

2


np.int64(1)

In [69]:
word_embedding = {
    "I": np.array([1, 0, 0]),
    "love": np.array([0, 1, 0]),
    "deep": np.array([0, 0, 1]),
    "learning": np.array([1, 1, 1]),
}

words_in_document = ["I", "love", "deep", "learning"]
document_embedding = np.array([0, 0, 0])

for word in words_in_document:
    document_embedding += word_embedding[word]

document_embedding

array([2, 2, 2])

In [70]:
# Minimum edit distance algorithm between two strings


def min_edit_distance(
    source: str,
    target: str,
    insertion_cost: int = 1,
    deletion_cost: int = 1,
    substitution_cost: int = 2,
) -> int:
    # Create a matrix and initialize with zeros
    matrix = np.zeros((len(source) + 1, len(target) + 1))

    # Initialize the first row and column
    for i in range(1, len(source) + 1):
        matrix[i][0] = matrix[i - 1][0] + deletion_cost
    for j in range(1, len(target) + 1):
        matrix[0][j] = matrix[0][j - 1] + insertion_cost

    # Fill in the matrix
    for i in range(1, len(source) + 1):
        for j in range(1, len(target) + 1):
            if source[i - 1] == target[j - 1]:
                substitution = 0
            else:
                substitution = substitution_cost
            matrix[i][j] = min(
                matrix[i - 1][j] + deletion_cost,
                matrix[i][j - 1] + insertion_cost,
                matrix[i - 1][j - 1] + substitution,
            )
    print(matrix)
    return int(matrix[len(source)][len(target)])


# Test the function with some examples
print(min_edit_distance("Pie", "Bye"))  # 3

[[0. 1. 2. 3.]
 [1. 2. 3. 4.]
 [2. 3. 4. 5.]
 [3. 4. 5. 4.]]
4


In [None]:
# Viterbi algorithm for finding the most likely sequence of hidden states in a Hidden Markov Model (HMM)


def viterbi(
    observations: np.ndarray,
    states: np.ndarray,
    start_probability: np.ndarray,
    transition_probability: np.ndarray,
    emission_probability: np.ndarray,
) -> np.ndarray:
    num_observations = len(observations)
    num_states = len(states)
    viterbi_matrix = np.zeros((num_states, num_observations))
    backpointer = np.zeros((num_states, num_observations))

    # Initialize the first column of the viterbi matrix
    for i in range(num_states):
        viterbi_matrix[i, 0] = (
            start_probability[i] * emission_probability[i, observations[0]]
        )
        backpointer[i, 0] = 0

    # Fill in the viterbi matrix
    for t in range(1, num_observations):
        for s in range(num_states):
            viterbi_values = np.zeros(num_states)
            for s_prime in range(num_states):
                viterbi_values[s_prime] = (
                    viterbi_matrix[s_prime, t - 1]
                    * transition_probability[s_prime, s]
                    * emission_probability[s, observations[t]]
                )
            viterbi_matrix[s, t] = np.max(viterbi_values)
            backpointer[s, t] = np.argmax(viterbi_values)

    # Find the most likely final state
    final_state = np.argmax(viterbi_matrix[:, num_observations - 1])
    hidden_states = np.zeros(num_observations)
    hidden_states[num_observations - 1] = final_state

    # Backtrack to find the most likely sequence of hidden states
    for t in range(num_observations - 2, -1, -1):
        hidden_states[t] = backpointer[int(hidden_states[t + 1]), t + 1]

    return hidden_states