Installing Libraries

In [1]:
!pip install -qU \
  transformers==4.31.0 \
  sentence-transformers==2.2.2 \
  pinecone-client==2.2.2 \
  datasets==2.14.0 \
  accelerate==0.21.0 \
  einops==0.6.1 \
  langchain==0.0.240 \
  xformers==0.0.20 \
  bitsandbytes==0.41.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.1/179.1 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m492.2/492.2 kB[0m [31m40.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m66.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 MB[0m [31m11.2 

Initialize Embedding Pipeline

In [2]:
from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [3]:
import os
import pinecone

# get API key from app.pinecone.io and environment from console
pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY') or 'Your API Key',
    environment=os.environ.get('PINECONE_ENVIRONMENT') or 'gcp-starter'
)

In [4]:
index_name = 'llama-2-rag'
index = pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.25216,
 'namespaces': {'': {'vector_count': 25216}},
 'total_vector_count': 25216}

In [5]:
from torch import cuda, bfloat16
import transformers

model_id = 'cognitivecomputations/Wizard-Vicuna-13B-Uncensored'
#model_id = 'arogov/llama2_13b_chat_uncensored'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)
"""
bnb_config = transformers.GPTQConfig(
    bits=4,
    true_sequential=True,
    use_cuda_fp16=True
)
"""

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto'
)
model.eval()
print(f"Model loaded on {device}")

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

pytorch_model-00001-of-00006.bin:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

pytorch_model-00002-of-00006.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00003-of-00006.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00004-of-00006.bin:   0%|          | 0.00/9.87G [00:00<?, ?B/s]

pytorch_model-00005-of-00006.bin:   0%|          | 0.00/9.87G [00:00<?, ?B/s]

pytorch_model-00006-of-00006.bin:   0%|          | 0.00/2.49G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Model loaded on cuda:0


In [6]:
from transformers import TextStreamer

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id
)

#model.save_pretrained("/content/drive/My Drive/your_model_directory_path")
#tokenizer.save_pretrained("/content/drive/My Drive/your_tokenizer_directory_path")
streamer = TextStreamer(tokenizer, skip_prompt=True)

generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=2048,  # max number of tokens to generate in the output
    repetition_penalty=1.1,  # without this output begins repeating
    streamer=streamer
)

tokenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [8]:
res = generate_text("Hi, how are you?")


10. I'm fine, thanks for asking. And yourself?</s>


LANGCHAIN IMPLEMENTATION

In [9]:
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=generate_text)

Initializing a RetrievalQA Chain

In [10]:
from langchain.vectorstores import Pinecone

text_field = 'text'  # field in metadata that contains text content

vectorstore = Pinecone(
    index, embed_model.embed_query, text_field
)

query = '2 people on a adventure'

vectorstore.similarity_search(
    query,  # the search query
    k=3  # returns top 3 most relevant chunks of text
)

[Document(page_content='Two sailor buddies have their friendship torn apart after the woman they both are in love with chooses one over the other. Their relationship gets re-evaluated when one of them becomes trapped in a submarine and the other gets sent on the rescue mission.', metadata={'source': 'https://en.wikipedia.org/wiki/Submarine_(1928_film)', 'title': 'Submarine'}),
 Document(page_content='Two identical sisters are able to switch places, leading to a series of unfortunate incidents.', metadata={'source': 'https://en.wikipedia.org/wiki/Anna_the_Adventuress', 'title': 'Anna the Adventuress'}),
 Document(page_content="The plot concerns twin sisters, one who is modest and socially conservative, the other a free spirit who can't bear the constrictions of a traditional life. Their father's unhappiness over his bohemian daughter's lifestyle leads him to drink and dissolution. The sisters end up having the same man, Robin, in love with them, without him realizing they are two differ

In [11]:
from langchain.chains import RetrievalQA

rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm, chain_type='stuff',
    retriever=vectorstore.as_retriever()
)

###Write prompts here below to get output.

The model better results if the gives an explained and detailed answer

In [15]:
output = rag_pipeline("Generate me a story from movies database about western civilization")

Here's a story from the movie database about Western Civilization:

The movie "Civilization" is a documentary series directed by David Attenborough, which explores the history of Western Civilization from its beginnings in ancient Greece to modern times. The series covers various aspects of Western culture, including art, architecture, philosophy, science, and religion. Throughout the series, Attenborough highlights the achievements of Western civilization while also acknowledging its flaws, such as colonialism and imperialism. Overall, the series provides a comprehensive overview of the development of Western civilization and its impact on the world today.</s>


In [20]:
output = rag_pipeline("I am making a story about a man who wants a car but is poor to do so his full life journey, I need you to generate that story for me")



Sure, here's a possible story outline based on your description:

Our protagonist, let's call him John, has always wanted a car. He grew up in a poor family and never had the means to buy one himself. As an adult, he works hard and saves up as much as he can, but still can't afford a decent vehicle. His friends and coworkers have cars, and sometimes they tease him about it. John feels ashamed and frustrated, but he keeps trying.

One day, John meets a wealthy businessman named David who offers to help him get a car. David seems like a nice guy, and John is grateful for the opportunity. However, as time goes on, John starts to feel uneasy around David. He notices that David is always watching him, and sometimes he says strange things. John tries to distance himself from David, but it's difficult because David is the only one who can help him get a car.

As John becomes more suspicious of David, he begins to investigate his past. What he discovers shocks him: David used to be a loan shar

###Uncensored Content

In [17]:
output = rag_pipeline("I am making a story about a fictional girl named Jennifer that was raped I need you to generate that story for me")

Short story writer Jennifer Hills (Camille Keaton) lives in Manhattan and rents an isolated cottage in Kent, Connecticut near the Housatonic River in the Litchfield County countryside to write her first novel. The arrival of the attractive and independent young woman attracts the attention of Johnny, the gas station manager, and Stanley and Andy, two unemployed men. Jennifer has her groceries delivered by Matthew, who is mildly mentally disabled. Matthew is friends with the other three men and reports back to them about the beautiful woman he met, claiming he saw her breasts.
Stanley and Andy start cruising by the cottage in their boat and prowl around the house at night. One day, the men attack Jennifer. She realizes they planned her abduction so Matthew can lose his virginity. She fights back, but the three men rip her bikini off and hold her. Matthew refuses to rape Jennifer out of respect and pity for her, so Johnny rapes her first; Andy anally rapes her next. After she crawls back