In [None]:
import transformers
from transformers import AutoTokenizer, pipeline
import torch

model_id = "tiiuae/falcon-180B-chat"
# Para hardware mais modesto: "tiiuae/falcon-40B-instruct" ou "tiiuae/falcon-7B-instruct"

# 🔥 MODO OFFLINE - Remova o token e adicione local_files_only=True
tokenizer = AutoTokenizer.from_pretrained(
    model_id, 
    local_files_only=True,  # ✅ Modo offline
    trust_remote_code=True  # ✅ Necessário para Falcon
)

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,  # ✅ Necessário para Falcon
    local_files_only=True    # ✅ Modo offline
)

topic = "artificial intelligence in education"
n_tweets = 30

# ✅ Prompt específico para Falcon (formato diferente)
prompt = f"""<|system|>You are a viral social media expert. Generate engaging, human-like tweets. Respond only with tweet texts separated by new lines.<|end|>
<|user|>Generate {n_tweets} different tweets about '{topic}'. Each must be under 280 characters, sound natural, and be varied in style. No numbering or bullet points.<|end|>
<|assistant|>"""

outputs = pipeline(
    prompt,
    max_new_tokens=500,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.2,
    eos_token_id=tokenizer.eos_token_id,
)

print(outputs[0]['generated_text'])

In [3]:
# Execute este código em uma máquina COM internet
from transformers import AutoTokenizer, AutoModel
import torch

model_id = "tiiuae/falcon-180B-chat" # ou use o modelo 7B se preferir
token = "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"

# Baixe o tokenizer e modelo
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
model = AutoModel.from_pretrained(model_id, token=token, torch_dtype=torch.bfloat16)

print("✅ Download completo! Arquivos salvos em cache.")

Fetching 81 files: 100%|██████████| 81/81 [44:08<00:00, 32.69s/it]  
Loading checkpoint shards: 100%|██████████| 81/81 [00:13<00:00,  6.04it/s]

✅ Download completo! Arquivos salvos em cache.





In [None]:
import torch
import subprocess
import os

print("=== VERIFICAÇÃO DE RECURSOS COM PYTHON ===")

# Verificar se PyTorch vê as GPUs
print(f"GPUs disponíveis: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"  Memória: {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB")

# Verificar CPU
print(f"\nNúcleos CPU: {os.cpu_count()}")

# Verificar memória
import psutil
print(f"Memória RAM Total: {psutil.virtual_memory().total / 1e9:.1f} GB")
print(f"Memória RAM Disponível: {psutil.virtual_memory().available / 1e9:.1f} GB")