In [None]:
from transformers import pipeline

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tqdm

LLMs: GPT (OpenAI), LlaMA (Meta), Gemma (Google)

Diffusion: Dalle, Imagen, Stable diffusion, Midjourney

## 1. Transformer models: 1.3 Transformers

### Classifier

In [None]:
classifier = pipeline('sentiment-analysis', device='mps')

In [None]:
sentences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!"
]

In [None]:
output = classifier(sentences)

In [None]:
output

### Zero-shot classification

In [None]:
classifier = pipeline('zero-shot-classification', device='mps')

In [None]:
output = classifier(
    'This is a course about the Transformers library',
    candidate_labels=['education', 'politics', 'business']
)

In [None]:
output

### Text generation

In [None]:
#generator = pipeline('text-generation', model='distilgpt2', device='mps')

In [None]:
#output = generator(
#    'In this course, we will teach you how to', 
#    max_length=30, 
#    num_return_sequences=2
#)

### Mask filling

In [None]:
#unmasker = pipeline('fill-mask', device='mps')

In [None]:
#output = unmasker("This course will teach you all about <mask> models.", top_k=2)

### Named entity recognition

In [None]:
ner = pipeline('ner', grouped_entities=True, device='mps')

In [None]:
output = ner('My name is Sylvain and I work at Hugging Face in Brooklyn.')

In [None]:
output

### Question answering

In [None]:
question_answerer = pipeline('question-answering', device='mps')

In [None]:
output = question_answerer(
    question='Where do I work?',
    context='My name is Sylvain and I work at Hugging Face in Brooklyn.',
)

In [None]:
output

### Summarization

In [None]:
#with open('prompt_summary.txt') as f:
#    lines = f.readlines()
#prompt_lst = [line[:-1] if i != len(lines) - 1 else line for i, line in enumerate(lines)]
#prompt = ''.join(prompt_lst)

In [None]:
#summarizer = pipeline('summarization', device='mps')

In [None]:
#output = summarizer(prompt)

### Translation

In [None]:
#translator = pipeline('translation', model='Helsinki-NLP/opus-mt-fr-en')

In [None]:
#translator("Ce cours est produit par Hugging Face.")

## Notes

In [None]:
name_params = []
for n, p in classifier.model.named_parameters():
    name_params.append([n, p.numel()])

In [None]:
df = pd.DataFrame(name_params, columns=['name', 'parameters'])

In [None]:
df['parameters'].sum()

In [None]:
import string

In [None]:
letters = list(string.ascii_letters)

In [None]:
mean_word_length = 5
mean_sentence_length = 1
n_sentences = 100
arr = np.random.choice(letters, (mean_sentence_length * n_sentences, mean_word_length))

In [None]:
sentences_rand = []
for i in range(0, arr.shape[0], mean_sentence_length):
    sent = ''
    for j in range(mean_sentence_length):
        ind = i+j
        sent += ''.join(arr[ind]) + ' '
    rand_punc = np.random.choice(['.', '?', '!'])
    sentences_rand.append(sent[:-1] + rand_punc)

In [None]:
output_rand = classifier(sentences_rand)

In [None]:
feature_extractor = pipeline('feature-extraction', device='mps')

In [None]:
words = ['man', 'woman', 'king', 'queen']

In [None]:
output = feature_extractor(words)

In [None]:
output = np.array(output)

In [None]:
output.shape