In [1]:
import sys
import os

# Check current working directory
current_dir = os.getcwd()
print(f"Current Directory: {current_dir}")

# Change to your project root directory
project_root = '/home/razvansavin/Proiecte/flexiai'
os.chdir(project_root)
print(f"Changed Directory to: {os.getcwd()}")

# Add project root directory to sys.path
sys.path.append(project_root)
print(f"Project root added to sys.path")

Current Directory: /home/razvansavin/Proiecte/flexiai/examples/Code examples
Changed Directory to: /home/razvansavin/Proiecte/flexiai
Project root added to sys.path


In [3]:
# Your script or main application file
from flexiai.core.flexiai_client import FlexiAI
from flexiai.config.logging_config import setup_logging

# Set up logging
setup_logging()

# Initialize FlexiAI
flexiai = FlexiAI()

# Create an embedding
text = "OpenAI provides powerful tools for developers."
try:
    embedding = flexiai.embedding_manager.create_embedding(text)
    print("Embedding created successfully")
    print(f"{embedding[:20]}...")
except Exception as e:
    print(f"Error creating embedding: {str(e)}")


Embedding created successfully
[0.001711169839836657, -0.008033276535570621, -0.01131216436624527, -0.013320484198629856, 0.003306212369352579, 0.006352846045047045, -0.009037435986101627, -0.003398431232199073, 0.007760035805404186, -0.04169652983546257, 0.02409982867538929, 0.02855365350842476, 0.028471680358052254, -0.02169531211256981, -0.004112272523343563, 0.016257820650935173, 0.02004220522940159, -0.010506104677915573, 0.011510264128446579, 0.00212956964969635]...


---
---
---

In [3]:
%pip install numpy
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


### 1. **Text Similarity and Search**

#### Example: Finding Similar Texts
You can use embeddings to find texts that are similar to a given input text. This is useful for search engines, recommendation systems, and clustering similar documents.


In [4]:
import numpy as np

def cosine_similarity(vec1, vec2):
    """Calculate the cosine similarity between two vectors."""
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# Embedding for the input text
input_text = "OpenAI provides powerful tools for developers."
input_embedding = flexiai.embedding_manager.create_embedding(input_text)

# Embeddings for a list of texts
texts = [
    "Developers use OpenAI tools for creating applications.",
    "AI is revolutionizing technology.",
    "Cooking recipes are easy to find online.",
]
embeddings = [flexiai.embedding_manager.create_embedding(text) for text in texts]

# Find the most similar text
similarities = [cosine_similarity(input_embedding, emb) for emb in embeddings]
most_similar_index = np.argmax(similarities)
print(f"Most similar text: {texts[most_similar_index]}")


Most similar text: Developers use OpenAI tools for creating applications.


### 2. **Clustering**

#### Example: Clustering Texts
Clustering texts based on their embeddings can help in organizing and categorizing large datasets.

In [5]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

# Example: Clustering Texts
texts = [
    "OpenAI provides powerful tools for developers.",
    "Developers use OpenAI tools for creating applications.",
    "AI is revolutionizing technology.",
    "Cooking recipes are easy to find online.",
    "Artificial intelligence is a growing field.",
    "Food blogs are popular.",
    "Machine learning is a subset of AI.",
    "I love baking new recipes.",
    "AI tools help developers build software.",
    "Technology is advancing rapidly.",
    "Recipes for healthy eating are trending.",
    "AI is used in various industries.",
]

# Generate embeddings for texts
embeddings = [flexiai.embedding_manager.create_embedding(text) for text in texts]

# Scale embeddings
scaler = StandardScaler()
scaled_embeddings = scaler.fit_transform(embeddings)

# Clustering with KMeans
num_clusters = 3
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
clusters = kmeans.fit_predict(scaled_embeddings)

# Print clustered texts
for i in range(num_clusters):
    cluster_texts = [texts[j] for j in range(len(texts)) if clusters[j] == i]
    print(f"Cluster {i}:")
    for text in cluster_texts:
        print(f" - {text}")


Cluster 0:
 - AI is revolutionizing technology.
 - Artificial intelligence is a growing field.
 - Machine learning is a subset of AI.
 - Technology is advancing rapidly.
 - AI is used in various industries.
Cluster 1:
 - Cooking recipes are easy to find online.
 - Food blogs are popular.
 - I love baking new recipes.
 - Recipes for healthy eating are trending.
Cluster 2:
 - OpenAI provides powerful tools for developers.
 - Developers use OpenAI tools for creating applications.
 - AI tools help developers build software.


### 3. **Semantic Search**

#### Example: Implementing Semantic Search
Semantic search uses embeddings to find relevant documents based on the meaning of the query rather than keyword matching.


In [6]:
# Example: Semantic Search
query = "How does AI impact technology?"
query_embedding = flexiai.embedding_manager.create_embedding(query)

# Assuming `documents` is a list of document texts and `doc_embeddings` is a list of their embeddings
documents = [
    "AI is revolutionizing technology.",
    "Cooking recipes are easy to find online.",
    "Developers use OpenAI tools for creating applications.",
]
doc_embeddings = [flexiai.embedding_manager.create_embedding(doc) for doc in documents]

# Find the most relevant document
similarities = [cosine_similarity(query_embedding, emb) for emb in doc_embeddings]
most_relevant_index = np.argmax(similarities)
print(f"Most relevant document: {documents[most_relevant_index]}")


Most relevant document: AI is revolutionizing technology.


### 4. **Text Classification**

#### Example: Classifying Texts
Embeddings can be used as features for text classification tasks such as sentiment analysis, topic categorization, etc.


In [7]:
from sklearn.linear_model import LogisticRegression

# Example: Text Classification
texts = [
    "I love using OpenAI tools.",
    "AI is transforming the industry.",
    "I found a great recipe for pasta.",
    "Artificial intelligence is fascinating.",
]
labels = [1, 1, 0, 1]  # 1 for tech-related, 0 for non-tech-related

embeddings = [flexiai.embedding_manager.create_embedding(text) for text in texts]

# Train a simple classifier
classifier = LogisticRegression()
classifier.fit(embeddings, labels)

# Classify a new text
new_text = "Developers are creating amazing applications with AI."
new_embedding = flexiai.embedding_manager.create_embedding(new_text)
predicted_label = classifier.predict([new_embedding])
print(f"Predicted label: {'tech-related' if predicted_label[0] == 1 else 'non-tech-related'}")


Predicted label: tech-related


### 5. **Question Answering**

#### Example: Answering Questions with Context
Using embeddings, you can implement a basic question-answering system that retrieves the most relevant answer from a given context.

In [8]:
# Example: Question Answering
context = [
    "OpenAI provides powerful tools for developers.",
    "AI is revolutionizing technology.",
    "Developers use OpenAI tools for creating applications.",
]
question = "What does OpenAI provide?"

# Embeddings for context sentences and question
context_embeddings = [flexiai.embedding_manager.create_embedding(sentence) for sentence in context]
question_embedding = flexiai.embedding_manager.create_embedding(question)

# Find the most relevant context sentence
similarities = [cosine_similarity(question_embedding, emb) for emb in context_embeddings]
most_relevant_index = np.argmax(similarities)
print(f"Answer: {context[most_relevant_index]}")


Answer: OpenAI provides powerful tools for developers.


### 6. **Sentiment Analysis**

#### Example: Sentiment Classification with Logistic Regression
Using embeddings as features for training a sentiment classifier.

In [9]:
from flexiai.core.flexiai_client import FlexiAI
from flexiai.config.logging_config import setup_logging
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Set up logging
setup_logging()

# Initialize FlexiAI
flexiai = FlexiAI()

# Sample data
texts = [
    "I love using OpenAI tools.",
    "AI is revolutionizing the industry.",
    "I found a great recipe for pasta.",
    "Artificial intelligence is fascinating.",
    "I am so frustrated with this service.",
    "This is the worst experience I've ever had.",
]
labels = [1, 1, 1, 1, 0, 0]  # 1 for positive, 0 for negative

# Generate embeddings for texts using FlexiAI `create_embeddings` function
embeddings = [flexiai.embedding_manager.create_embedding(text) for text in texts]

# Filter out any None values if embedding generation failed for any text
embeddings = [emb for emb in embeddings if emb is not None]
labels = [label for emb, label in zip(embeddings, labels) if emb is not None]

# Split the data into training and test sets
train_embeddings, test_embeddings, train_labels, test_labels = train_test_split(embeddings, labels, test_size=0.2, random_state=42)

# Train the logistic regression classifier
classifier = LogisticRegression()
classifier.fit(train_embeddings, train_labels)

# Evaluate the classifier
predicted_test_labels = classifier.predict(test_embeddings)
accuracy = accuracy_score(test_labels, predicted_test_labels)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Classify new texts
new_texts = [
    "OpenAI's tools are amazing!",
    "I am very disappointed with the service.",
]
# Generate NEW embeddings for texts using FlexiAI `create_embeddings` function
new_embeddings = [flexiai.embedding_manager.create_embedding(text) for text in new_texts]
predicted_labels = classifier.predict(new_embeddings)

for text, label in zip(new_texts, predicted_labels):
    sentiment = 'positive' if label == 1 else 'negative'
    print(f"Text: '{text}' - Sentiment: {sentiment}")


Test Accuracy: 100.00%
Text: 'OpenAI's tools are amazing!' - Sentiment: positive
Text: 'I am very disappointed with the service.' - Sentiment: negative


### 7. **Enhanced Sentiment Analysis with Data Augmentation**

#### Example: Sentiment Classification with Logistic Regression and Data Augmentation
In this example, we improve sentiment classification by augmenting the data to create more variations of the texts. This helps in better generalization of the classifier.

In [10]:
from flexiai.core.flexiai_client import FlexiAI
from flexiai.config.logging_config import setup_logging
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Set up logging
setup_logging()

# Initialize FlexiAI
flexiai = FlexiAI()

# Sample data
texts = [
    "I love using OpenAI tools.",
    "AI is revolutionizing the industry.",
    "I found a great recipe for pasta.",
    "Artificial intelligence is fascinating.",
    "I am so frustrated with this service.",
    "This is the worst experience I've ever had.",
    "I hate waiting in long lines.",
    "The product quality is terrible.",
    "The new feature is awesome!",
    "I am very pleased with the customer support.",
]
labels = [1, 1, 1, 1, 0, 0, 0, 0, 1, 1]  # 1 for positive, 0 for negative

# Data augmentation: create slight variations of the texts
augmented_texts = texts + [
    "I absolutely love using OpenAI tools.",
    "The industry is being revolutionized by AI.",
    "I discovered a fantastic recipe for pasta.",
    "AI is incredibly fascinating.",
    "This service is extremely frustrating.",
    "I've never had a worse experience.",
    "Waiting in long lines is so annoying.",
    "The quality of the product is horrible.",
    "The new feature is really awesome!",
    "Customer support has been very pleasing.",
    "Using OpenAI tools is such a pleasure.",
    "AI is changing the world.",
    "Found an amazing pasta recipe!",
    "I find artificial intelligence really interesting.",
    "This service makes me so angry.",
    "Worst service experience ever.",
    "I despise long wait times.",
    "Product quality is very poor.",
    "Loving the new feature!",
    "Very satisfied with customer support.",
]
augmented_labels = labels + labels + labels

# Generate embeddings for texts
embeddings = [flexiai.embedding_manager.create_embedding(text) for text in augmented_texts]

# Filter out any None values if embedding generation failed for any text
embeddings = [emb for emb in embeddings if emb is not None]
augmented_labels = [label for emb, label in zip(embeddings, augmented_labels) if emb is not None]

# Split the data into training and test sets
train_embeddings, test_embeddings, train_labels, test_labels = train_test_split(embeddings, augmented_labels, test_size=0.2, random_state=42, stratify=augmented_labels)

# Train the logistic regression classifier with regularization
classifier = LogisticRegression(C=1.0, solver='liblinear')
classifier.fit(train_embeddings, train_labels)

# Cross-validation using StratifiedKFold to ensure balanced splits
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(classifier, train_embeddings, train_labels, cv=cv)

print(f"Cross-Validation Accuracy: {cv_scores.mean() * 100:.2f}%")

# Evaluate the classifier on the test set
predicted_test_labels = classifier.predict(test_embeddings)
accuracy = accuracy_score(test_labels, predicted_test_labels)
precision = precision_score(test_labels, predicted_test_labels)
recall = recall_score(test_labels, predicted_test_labels)
f1 = f1_score(test_labels, predicted_test_labels)

print(f"Test Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")

# Classify new texts
new_texts = [
    "OpenAI's tools are amazing!",
    "I am very disappointed with the service.",
]
new_embeddings = [flexiai.embedding_manager.create_embedding(text) for text in new_texts]
predicted_labels = classifier.predict(new_embeddings)

for text, label in zip(new_texts, predicted_labels):
    sentiment = 'positive' if label == 1 else 'negative'
    print(f"Text: '{text}' - Sentiment: {sentiment}")


Cross-Validation Accuracy: 96.00%
Test Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1 Score: 100.00%
Text: 'OpenAI's tools are amazing!' - Sentiment: positive
Text: 'I am very disappointed with the service.' - Sentiment: negative


In [11]:
# Print embeddings and labels for inspection
print("Embeddings and Labels:")
for emb, label in zip(embeddings, augmented_labels):
    print(f"Label: {label}, Embedding: {emb[:5]}...")

Embeddings and Labels:
Label: 1, Embedding: [-0.02184968814253807, -0.02521532215178013, -0.005586950574070215, -0.0334678553044796, 0.007828461937606335]...
Label: 1, Embedding: [-0.01700989156961441, -0.024246729910373688, -0.009135679341852665, -0.0009535697172395885, -0.0037312875501811504]...
Label: 1, Embedding: [0.01686893217265606, 0.005502017680555582, 0.009441371075809002, -0.007303331978619099, 0.012377087958157063]...
Label: 1, Embedding: [-0.01049409806728363, -0.0019858062732964754, 0.012321686372160912, -0.015873534604907036, 0.010074334219098091]...
Label: 0, Embedding: [-0.027968741953372955, -0.0058114295825362206, -0.02613494172692299, -0.008957913145422935, -0.004756005480885506]...
Label: 0, Embedding: [-0.02066449075937271, -0.014167794026434422, 0.010794510133564472, -0.02041461691260338, -0.03453243523836136]...
Label: 0, Embedding: [-0.029232149943709373, 2.1115254639880732e-05, 0.010463926941156387, -0.012578565627336502, -0.03162316977977753]...
Label: 0, Emb