In [None]:
!pip install mistralai

## Mistral Client Intialization

In [None]:
from mistralai import Mistral
from google.colab import userdata

# Instantiate the MistralClient to create a client object
client = Mistral(userdata.get("MISTRAL_API_KEY"))


# Use the client object to list all available models
models = client.models.list()

# Iterate over the list of models and print each one on a new line
for model in models.data:
    print(model.id)


## Calculation of embedding

In [None]:
sample_sentence = "A young wizard fights evil."
sample_embed = client.embeddings.create(model='mistral-embed', inputs=[sample_sentence])
clean_sample_embed =  sample_embed.data[0].embedding
print(len(clean_sample_embed))
print(clean_sample_embed[:10])

## Semantic search with cosine similarity

In [None]:
dish_descriptions = [
    "Grilled steak with garlic butter",       # Meat
    "Seared salmon with lemon dill sauce",    # Fish
    "Roasted vegetable medley with herbs",    # Veggie
    "Spicy tofu stir-fry with vegetables",    # Asian
    "Tacos with spicy chicken and salsa",     # Mexican
    "Spicy chili con carne with beans",       # Spicy
    "Chocolate lava cake with vanilla ice cream",  # Sweet
    "Creamy tomato basil soup",               # Liquid
    "Cheeseburger with fries",                # Fast
    "Seared scallops with truffle oil",       # Fine
]

dish_embeds = client.embeddings.create(model='mistral-embed', inputs=dish_descriptions)
dish_embeds_arr = []


for embed in dish_embeds.data:
  dish_embeds_arr.append(embed.embedding)

print(len(dish_embeds_arr))
print(len(dish_embeds_arr[0]))

In [None]:
clean_embed = []

for embed in dish_embeds.data:
  clean_embed.append(embed.embedding)

print(clean_embed[2][:10])


In [None]:
user_input = input("What do you prefer for dinner? ")
user_embed = client.embeddings.create(model='mistral-embed', inputs=[user_input]).data[0].embedding
user_embed[:10]

In [None]:
from scipy.spatial.distance import cosine

print (1 - cosine(user_embed, dish_embeds_arr[0]))
print (1 - cosine(user_embed, dish_embeds_arr[1]))
print (1 - cosine(user_embed, dish_embeds_arr[2]))

In [None]:
from scipy.spatial.distance import cosine, cdist
import numpy as np

distances = cdist(np.array(user_embed).reshape(1, -1), dish_embeds_arr, "cosine")

mostRelevant = np.argmin(distances) #index of the most relevant movie

print(f"most relevant index: {mostRelevant}")
print(f"Then you might find tasty: {dish_descriptions[mostRelevant]}")


## Pinecone storage and semantic search

In [None]:
!pip install pinecone

In [None]:
import pinecone

from pinecone import Pinecone, ServerlessSpec

pcone = Pinecone(api_key=userdata.get("PINECONE_API_KEY"))


In [None]:
existing_indexes = pcone.list_indexes()
existing_index_names = [index.name for index in existing_indexes]

# Check and delete sich index if it already exist
if 'dish-embeddings' in existing_index_names:
  pcone.delete_index('dish-embeddings')
# Create fresh index
pcone.create_index(
  name='dish-embeddings', dimension=1024, metric='cosine',
  spec=ServerlessSpec(cloud='aws', region='us-east-1')
)


In [None]:
index = pcone.Index('dish-embeddings')

In [None]:
for i, embed in enumerate(dish_embeds_arr):
    index.upsert([(f'dish_{i}', embed)])

In [None]:
result = index.query(vector=[user_embed], top_k=1)
closest_dish_id = result['matches'][0]['id']
print(closest_dish_id)
closest_dish_index = int(closest_dish_id.split('_')[1])
print(closest_dish_index)
print(dish_descriptions[closest_dish_index])

## Heat map Visualization

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

# Define the sentences and words
sentence1 = ["I", "enjoy", "spicy", "food"]
sentence2 = ["She", "likes", "hot", "meals"]

# Generate embeddings for each word
embeddings1 = client.embeddings.create(model='mistral-embed', inputs=sentence1)
embeddings2 = client.embeddings.create(model='mistral-embed', inputs=sentence2)

# Clean embed arrays
embeddings1_arr = []
embeddings2_arr = []

for embed in embeddings1.data:
  embeddings1_arr.append(embed.embedding)

for embed in embeddings2.data:
  embeddings2_arr.append(embed.embedding)


# Compute cosine similarity between each pair of words
similarity_scores = cosine_similarity(embeddings2_arr, embeddings1_arr)

# Create a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(similarity_scores, annot=True, cmap='Blues', xticklabels=sentence1, yticklabels=sentence2)

# Set titles and labels
plt.title('Semantic Similarity Heatmap')
plt.xlabel('Words in Sentence 1')
plt.ylabel('Words in Sentence 2')

# Show the heatmap
plt.show()

## t-SNE Visualizations

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE

# Define the sentences and words
sentence1 = ["I", "enjoy", "spicy", "food"]
sentence2 = ["She", "likes", "hot", "meals"]

# Generate embeddings for each word
embeddings1 = client.embeddings.create(model='mistral-embed', inputs=sentence1)
embeddings2 = client.embeddings.create(model='mistral-embed', inputs=sentence2)

# Clean embed arrays
embeddings1_arr = []
embeddings2_arr = []

for embed in embeddings1.data:
    embeddings1_arr.append(embed.embedding)

for embed in embeddings2.data:
    embeddings2_arr.append(embed.embedding)

# Combine embeddings and create labels
combined_embeddings = np.array(embeddings1_arr + embeddings2_arr)
labels = sentence1 + sentence2

# Use t-SNE to reduce dimensions to 2D
tsne = TSNE(n_components=2, perplexity=5, random_state=42)
tsne_results = tsne.fit_transform(combined_embeddings)

# Plot the t-SNE results
plt.figure(figsize=(10, 7))
for i, label in enumerate(labels):
    x, y = tsne_results[i, :]
    plt.scatter(x, y)
    plt.text(x+0.1, y+0.1, label, fontsize=12)

plt.title('t-SNE Visualization of Word Embeddings')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE


dish_embeds_arr.append(user_embed)
dish_descriptions.append(user_input)

dish_embeds_np_arr = np.array(dish_embeds_arr)
# Use t-SNE to reduce dimensions to 2D
tsne = TSNE(n_components=2, perplexity=5, random_state=42)
tsne_results = tsne.fit_transform(dish_embeds_np_arr)

# Plot the t-SNE results
plt.figure(figsize=(10, 7))
for i, label in enumerate(dish_descriptions):
    x, y = tsne_results[i, :]
    plt.scatter(x, y)
    plt.text(x + 0.1, y + 0.1, label, fontsize=9)

plt.title('t-SNE Visualization of Dish Embeddings')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.show()