In [None]:
# When using a colab notebook:
#!pip install aleph-alpha-client langchain dotenv

In [None]:
from aleph_alpha_client import Client, SemanticEmbeddingRequest, SemanticEmbeddingResponse, SemanticRepresentation, Prompt, TextControl
from scipy import spatial
import numpy as np
import os
from dotenv import load_dotenv

from langchain.llms import AlephAlpha
from langchain.embeddings import AlephAlphaSymmetricSemanticEmbedding, AlephAlphaAsymmetricSemanticEmbedding

from sklearn.neighbors import KNeighborsClassifier

In [None]:
load_dotenv()

client = Client(token=os.getenv("AA_TOKEN"))

## Let's use luminous embeddings as a classifier

In [None]:
# Here we define two classes that we want to classify
class_1 = ["Find balance in the digital age with mindful technology use and prioritizing self-care.",
"Take charge of your digital wellbeing by setting boundaries and practicing digital detoxes.",
"Promote digital wellbeing through healthy screen time habits and fostering positive online relationships.",
"Explore digital wellbeing apps and tools that help you manage your digital presence and mental health.",
"Prioritize your physical and mental health by practicing digital mindfulness and limiting screen time.",
"Digital wellbeing starts with conscious choices, such as disconnecting from devices and engaging in offline activities.",
]

class_2 = [
    "Digital natives effortlessly navigate the digital landscape, growing up in a world saturated with technology.",
"Born in the digital era, digital natives possess innate digital literacy and adapt quickly to new technologies.",
"Digital natives seamlessly integrate technology into their daily lives, leveraging it for communication, learning, and entertainment.",
"As digital natives, they are the vanguards of technological advancements, shaping the digital landscape with their digital-first mindset.",
"Digital natives are fluent in the language of emojis, hashtags, and memes, using them to express themselves in the digital realm.",
"Growing up surrounded by screens, digital natives are at ease multitasking across multiple devices and platforms.",
]

class_3 = [
    "Digital companies leverage innovative technologies to disrupt traditional industries and drive digital transformation.",
"Agile and data-driven, digital companies thrive in the ever-evolving digital ecosystem, constantly adapting to market demands.",
"Digital companies prioritize user experience, leveraging intuitive interfaces and seamless interactions to engage and retain customers.",
"From e-commerce giants to fintech startups, digital companies revolutionize the way business is conducted in the digital age.",
"Digital companies harness the power of big data and analytics to gain actionable insights and drive informed decision-making.",
"In a borderless digital world, digital companies operate globally, transcending geographical limitations and connecting people worldwide.",
]

### Next we generate embeddings for each class

In [None]:
# TODO: Use the AlephAlpha client to embed the sentences in the two classes
embeddings_class_1 = [client.semantic_embed(SemanticEmbeddingRequest(prompt=Prompt.from_text(text), representation=SemanticRepresentation.Symmetric), model="luminous-base").embedding for text in class_1]
embeddings_class_2 = [client.semantic_embed(SemanticEmbeddingRequest(prompt=Prompt.from_text(text), representation=SemanticRepresentation.Symmetric), model="luminous-base").embedding for text in class_2]
embeddings_class_3 = [client.semantic_embed(SemanticEmbeddingRequest(prompt=Prompt.from_text(text), representation=SemanticRepresentation.Symmetric), model="luminous-base").embedding for text in class_3]

new_sentence = "In order to retain control over you life, you should try to limit your screen time and practice digital mindfulness."
sentence_embedding = client.semantic_embed(SemanticEmbeddingRequest(prompt=Prompt.from_text(new_sentence), representation=SemanticRepresentation.Symmetric), model="luminous-base").embedding

In [None]:
# TODO: get the average similarity of the new sentence to the two classes

similarities_class_1 = [1 - spatial.distance.cosine(sentence_embedding, embedding) for embedding in embeddings_class_1]
similarities_class_2 = [1 - spatial.distance.cosine(sentence_embedding, embedding) for embedding in embeddings_class_2]
similarities_class_3 = [1 - spatial.distance.cosine(sentence_embedding, embedding) for embedding in embeddings_class_3]

# TODO get the average similarity of the new sentence to the two classes
avg_similarity_class_1 = np.mean(similarities_class_1)
avg_similarity_class_2 = np.mean(similarities_class_2)
avg_similarity_class_3 = np.mean(similarities_class_3)

print("Similarity to class 1: ", avg_similarity_class_1)
print("Similarity to class 2: ", avg_similarity_class_2)
print("Similarity to class 3: ", avg_similarity_class_3)

### Let's actually train a classifier on these embeddings

In [None]:
# Define a classifier
clf = KNeighborsClassifier(n_neighbors=3)
# use the embeddings and the class labels to train a classifier
X = [np.array(embedding) for embedding in embeddings_class_1 + embeddings_class_2 + embeddings_class_3]
y = np.array([0] * len(embeddings_class_1) + [1] * len(embeddings_class_2) + [2] * len(embeddings_class_3))

# fit the classifier
clf.fit(X, y)

print("Predicted class: ", clf.predict([sentence_embedding]))

In [None]:
from sklearn.svm import SVC
# TODO train a support vector machine classifier and get the probability of the new sentence belonging to each class