In [None]:
# When using a colab notebook:
#!pip install aleph-alpha-client langchain python-dotenv

In [None]:
from aleph_alpha_client import Client, SemanticEmbeddingRequest, SemanticEmbeddingResponse, SemanticRepresentation, Prompt, TextControl
from scipy import spatial
import numpy as np
import os
from dotenv import load_dotenv

from langchain.llms import AlephAlpha
from langchain.embeddings import AlephAlphaSymmetricSemanticEmbedding, AlephAlphaAsymmetricSemanticEmbedding

import plotly.express as px
import plotly.graph_objects as go

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

In [None]:
load_dotenv()

client = Client(token=os.getenv("AA_TOKEN"))

## Let's use luminous embeddings as a classifier

In [None]:
# Here we define three classes that we want to classify
class_1 = ["Hey IT support team, I'm experiencing some issues with the software. It keeps crashing whenever I try to open a certain file. Can you please look into this and help me resolve the problem? Thanks!",
"Hi there, I need some assistance with the software's latest update. Ever since I installed it, some features seem to be missing, and the interface looks a bit different. Could you guide me on how to restore the missing functionalities or revert to the previous version? Your help is much appreciated!",
"Hello IT support, I'm having trouble connecting the software to my printer. It was working fine before, but now I can't seem to print any documents. Could you please walk me through the troubleshooting steps or provide any necessary drivers to fix this issue? Thanks a lot for your help!",
"Dear IT team, I'm a new user of this software, and I'm finding it a bit confusing to navigate through its features. Is there any user guide or online tutorial available that can help me get started and make the most of its capabilities? Your guidance would be invaluable!",
"Hi support, I accidentally deleted some important data within the software, and now I'm worried about recovering it. Is there a built-in recovery option, or do you have any recommendations for data recovery tools that work well with this software? Any help to retrieve the lost data would be fantastic! Thank you!",
]

class_2 = [
"Hello, I would like to inquire about the employee benefits and the process to enroll in the company's benefits program. Could you please provide me with more information?",
"Hi there, I recently changed my address and need to update my personal information with the company. Can you guide me on how to do that, or do I need to fill out a form?",
"Dear team, I'm interested in exploring internal job opportunities within the organization. Are there any current openings, and could you direct me to the appropriate department or person to discuss this further?",
"Good morning, I have a question about the paid time off policy. I'd like to understand how much accrued leave I currently have and how to request time off. Thank you!",
"Hey, I wanted to share some positive feedback about a colleague who went above and beyond to assist me on a project. Is there a recognition or appreciation program in place, and if so, how can I nominate this person for their outstanding efforts?",
]

class_3 = [
"Hello, I'm interested in purchasing your software product. Could you please provide me with more details about its features, pricing, and licensing options? I'm excited to explore how it can benefit my business!",
"Hi there, I've been researching software solutions for my specific needs, and your product seems like a perfect fit. Can you offer a demo or trial version so I can evaluate its capabilities before making a purchase decision?",
"Dear sales team, I'm impressed with the positive reviews and recommendations I've seen about your software. I'm ready to proceed with the purchase and would like to know the steps for placing an order and making payment. Looking forward to getting started with it!",
"Good day, I'm a long-time user of your free version, and I'm now ready to upgrade to the premium version for more advanced features. Can you please guide me on how to upgrade my account and take advantage of the additional functionalities?",
"Hi, I run a small business, and I believe your software can streamline our operations significantly. I'm interested in purchasing multiple licenses for my team. Can you provide any special discounts or packages for bulk orders? Thank you!"
]

### Next we generate embeddings for each class

In [None]:
# TODO: Use the AlephAlpha client to embed the sentences in the two classes
embeddings_class_1 = # TODO create embeddings for class 1
embeddings_class_2 = # TODO create embeddings for class 2
embeddings_class_3 = # TODO create embeddings for class 3


new_sentence = "Hey, my stupid Internet isn't working. Can you help me?"
sentence_embedding = # TODO create embedding for new sentence

In [None]:
# TODO: get the average similarity of the new sentence to the two classes

similarities_class_1 = # TODO get the similarities of the new sentence to the embeddings of class 1
similarities_class_2 = # TODO get the similarities of the new sentence to the embeddings of class 2
similarities_class_3 = # TODO get the similarities of the new sentence to the embeddings of class 3

# TODO get the average similarity of the new sentence to the two classes
avg_similarity_class_1 = np.mean(similarities_class_1)
avg_similarity_class_2 = np.mean(similarities_class_2)
avg_similarity_class_3 = np.mean(similarities_class_3)

print("Similarity to class 1: ", avg_similarity_class_1)
print("Similarity to class 2: ", avg_similarity_class_2)
print("Similarity to class 3: ", avg_similarity_class_3)

### Let's actually train a classifier on these embeddings

In [None]:
# Let's use PCA to reduce the dimensionality of the embeddings to 2D
pca = PCA(n_components=2)
pca.fit(embeddings_class_1 + embeddings_class_2 + embeddings_class_3)
pca_embeddings_class_1 = pca.transform(embeddings_class_1)
pca_embeddings_class_2 = pca.transform(embeddings_class_2)
pca_embeddings_class_3 = pca.transform(embeddings_class_3)

# Now let's plot the embeddings from all three classes

fig = go.Figure()

for i, embeddings in enumerate([pca_embeddings_class_1, pca_embeddings_class_2, pca_embeddings_class_3]):

    fig.add_trace(go.Scatter(
        x=embeddings[:,0],
        y=embeddings[:,1],
        mode="markers",
        name=f"Class {i+1}",
        marker=dict(
            size=12,
            color=["red", "green", "blue"][i],
        ),
        text=class_1 + class_2 + class_3,
        hovertemplate=
        "<b>%{text}</b><br><br>" +

        "<i>Similarity to new sentence:</i><br>" +
        "%{marker.color:.2f}<br>" +
        "<extra></extra>"
    ))

    

fig.update_traces(textposition='top center')

fig.show()

In [None]:
# Define a classifier
clf = # TODO create a Nearest Neighbors classifier with 3 neighbors (link to documentation: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)
# use the embeddings and the class labels to train a classifier
X = # TODO create a list of embeddings
y = # TODO create a list of class labels

# fit the classifier
clf.fit(X, y)

print("Predicted class: ", clf.predict([sentence_embedding]))

In [None]:
# Let's try a different classifier
svm = # TODO create a Support Vector Machine classifier (link to documentation: https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)
svm.fit(X, y)

print("Predicted class: ", svm.predict([sentence_embedding]))

# get the probabilities for each class
print("Probabilities: ", svm.predict_proba([sentence_embedding]))

In [None]:
# aggregate the embeddings of class 1

aggregation_class_1 = np.mean(embeddings_class_1, axis=0)
aggregation_class_2 = np.mean(embeddings_class_2, axis=0)
aggregation_class_3 = np.mean(embeddings_class_3, axis=0)

In [None]:
# PCA with three components

pca = PCA(n_components=3)

# fit the PCA to the embeddings
pca.fit(X)

# transform the embeddings
X_pca = pca.transform(X)

pca_agg_class_1 = pca.transform([aggregation_class_1])
pca_agg_class_2 = pca.transform([aggregation_class_2])
pca_agg_class_3 = pca.transform([aggregation_class_3])


# transform the new sentence

sentence_embedding_pca = pca.transform([sentence_embedding])

# plot the embeddings

fig = go.Figure()

for i, embeddings in enumerate([pca_agg_class_1, pca_agg_class_2, pca_agg_class_3, sentence_embedding_pca]):

    fig.add_trace(go.Scatter3d(
        x=embeddings[:,0],
        y=embeddings[:,1],
        z=embeddings[:,2],
        mode="markers",
        name=f"Class {i+1}",
        marker=dict(
            size=12,
            color=["red", "green", "blue", "yellow"][i],
        ),
        text=class_1 + class_2 + class_3,
        hovertemplate=
        "<b>%{text}</b><br><br>" +

        "<i>Similarity to new sentence:</i><br>" +
        "%{marker.color:.2f}<br>" +
        "<extra></extra>"
    ))


fig.show()