## Global Dependencies

In [None]:
import sys
import os
!{sys.executable} -m pip install torch
!{sys.executable} -m pip install defeatbeta-api
!{sys.executable} -m pip install diffusers
!{sys.executable} -m pip install huggingface-hub
!{sys.executable} -m pip install ipywidgets
!{sys.executable} -m pip install sentence-transformers

## Sentiment Analysis Pipeline

In [None]:
from transformers import pipeline

classifier = pipeline(task="sentiment-analysis", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")

prompt = list(input("Enter double-hyphen-separated values for sentiment analysis:").split("--"))

results = classifier(prompt)
print(results)

## Text-to-Image Generation

In [None]:
import torch
from diffusers import FluxPipeline

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32

pipe = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-dev",
    torch_dtype=dtype
)

pipe = pipe.to(device)

# Memory optimizations (VERY important)
pipe.enable_attention_slicing()
pipe.vae.enable_slicing()

generator = torch.Generator(device).manual_seed(0)

prompt = input("Input something to generate an image")

image = pipe(
    prompt,
    height=1024,
    width=1024,
    guidance_scale=3.5,
    num_inference_steps=40,  # 50 is overkill for FLUX
    max_sequence_length=512,
    generator=generator
).images[0]

image.save("flux-dev.png")


## Clustering & Semantic Search Embeddings

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


# Sentences we want sentence embeddings for
sentences = ["To be, or not to be; that is the question.", 
             "Whether 'tis nobler in the mind to suffer the slings and arrows of outrageous fortune, or to take arms against a sea of troubles, and by opposing end them"]

# Load model
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Tokenization
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)

# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

# Normalize embeddings
sentence_embeddings = F.normalize(sentence_embeddings)

print(f"Sentence embeddings:\n{sentence_embeddings}")


## Financial News Sentiment Analysis

In [None]:
import torch
import torch.nn.functional as F

from transformers import AutoTokenizer, AutoModelForSequenceClassification

checkpoint = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = ["The Fed has declared a 0.2% increase in interest rates", "TSLA's stock price has sky-rocketed in the last 24 hours."]

tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
output = model(**tokens)


# with torch.no_grad():
#     output = model(**tokens)

logits = output.logits
probs = F.softmax(logits, dim=-1)

predicted_class_ids = torch.argmax(probs, dim=-1).tolist()
predicted_labels =  [model.config.id2label[i] for i in predicted_class_ids]

for s, l, p, c in zip(sequences, predicted_labels, probs, predicted_class_ids):
    print(f"The statement '{s}' is classified as having {l} sentiment at a {p[c]:.4f} probability")