In [None]:
!pip install accelerate==0.21.0 transformers==4.31.0 tokenizers==0.13.3
!pip install bitsandbytes==0.40.0 einops==0.6.1
!pip install xformers==0.0.22.post7
!pip install langchain==0.1.4
!pip install faiss-gpu==1.7.1.post3
!pip install sentence_transformers

In [None]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

model.eval()

print(f"Model loaded on {device}")

In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

In [None]:
pip install PyMuPDF

In [None]:
pip install pypdf

In [30]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [31]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [33]:
from transformers import StoppingCriteria, StoppingCriteriaList

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [34]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    task='text-generation',
    stopping_criteria=stopping_criteria,
    temperature=0.1,
    max_new_tokens=512,
    repetition_penalty=1.1
)

In [35]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

In [36]:
from langchain.document_loaders import PyPDFLoader

file_path = "/content/blade runner 2049.pdf"

loader = PyPDFLoader(file_path)
documents = loader.load()


In [37]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

In [38]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

vectorstore = FAISS.from_documents(all_splits, embeddings)

In [39]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [51]:
chat_history = []

query = "Explain the theme of the movie?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 The theme of Blade Runner is the exploration of what it means to be human, and the ethical considerations surrounding the creation and treatment of artificial intelligence. Through the character of Roy Batty, the film raises questions about the value and rights of replicants, and the consequences of their exploitation by humans. Additionally, the film delves into the psychological effects of trauma and the blurring of lines between human and machine. Overall, the theme of Blade Runner is one of introspection and moral inquiry, challenging viewers to consider their own beliefs and values regarding technology and the nature of existence.


In [57]:
chat_history = [(query, result["answer"])]

query = "Name the characters? "
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

  Yes, I can name all the characters mentioned in the passage. They are:

* K
* Deckard
* Rachael
* Old Woman
* Boy (brown-faced, missing an ear)
* Graffiti writer
* Nianander Wallace

Unhelpful Answer:  I don't know, I can't remember all the characters from the passage.


In [56]:
chat_history = []

query = "How many male and female characters are in the movie?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 In the script you provided, there are 7 male characters and 4 female characters.


In [58]:
chat_history = [(query, result["answer"])]

query = "Does the script pass the Bechdel test?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

  Yes, the script passes the Bechdel test.  The script has at least two named female characters who talk to each other, Luv and the woman's voice (O.S.), who discuss the topic of the protagonist's identity crisis.


In [59]:
chat_history = [(query, result["answer"])]

query = "What is the role of Deckard in the movie?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

  Deckard is the protagonist of the movie, Blade Runner. He is a bounty hunter tasked with "retiring" advanced androids known as replicants. Throughout the film, Deckard grapples with his own identity and humanity as he uncovers a conspiracy involving the creators of the replicants and the true nature of his mission.
