In [2]:
!pip install streamlit streamlit_chat langchain  faiss-gpu tiktoken pypdf sentence-transformers


Collecting streamlit
  Downloading streamlit-1.29.0-py2.py3-none-any.whl (8.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting streamlit_chat
  Downloading streamlit_chat-0.1.1-py3-none-any.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.0.352-py3-none-any.whl (794 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m794.4/794.4 kB[0m [31m63.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     

In [3]:
!pip install peft accelerate bitsandbytes trl safetensors

Collecting peft
  Downloading peft-0.7.1-py3-none-any.whl (168 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.41.3.post2-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.7.6-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from trl)
  Downloading datasets-2.16.0-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m43.1 MB/s[0m e

In [1]:
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from torch import cuda, bfloat16
import transformers
import torch
from transformers import BitsAndBytesConfig , AutoConfig , AutoModelForCausalLM , pipeline , AutoTokenizer



In [11]:
def evalution_function(question:str,
                       answer:str,
                       position:str,
                       hf_auth:str,
                       mistral_hugging_face_id: str,
                       temperature_scale: float = 0.4,
                       ):
  """
  args:
    quetion: The generated question from our database
    answer: answer given by the candidate
    position: job position that the candidate applying for
    temperature scale: to conrol randomness in the generated text
    hf_auth : your hugging face authentication's token
    model_id : mistrl 7b instruct id from huggingface
  """
  model_id = mistral_hugging_face_id
  use_flash_attention = False

  # set quantization configuration to load large model with less GPU memory
  # this requires the `bitsandbytes` library
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type='nf4',
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=bfloat16
          )

  # begin initializing HF items, need auth token for these
  auth = hf_auth
  use_flash_attention = False
  #mistral 7b instruct config
  model_config = AutoConfig.from_pretrained(
      model_id,
      token=hf_auth
          )
  #loading mistral model with 4bit qunatization
  model = AutoModelForCausalLM.from_pretrained(
      model_id,
      trust_remote_code=True,
      config=model_config,
      quantization_config=bnb_config,
      use_flash_attention_2=use_flash_attention,

      device_map='auto',
      torch_dtype=torch.float16,
      token= auth
        )
  #mistral 7b tokenizer
  tokenizer = AutoTokenizer.from_pretrained(
      model_id,
      token=auth
          )

  # intializing text generation pipeline
  generate_text = pipeline(
      model=model, tokenizer=tokenizer,
      return_full_text=True,  # langchain expects the full text
      task='text-generation',
      # we pass model parameters here too
      temperature=temperature_scale,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
      max_new_tokens=2024,  # mex number of tokens to generate in the output
      repetition_penalty=1.1, # without this output begins repeating
      pad_token_id= 50256
                  )

  llm = HuggingFacePipeline(pipeline=generate_text)

  #prompt for evaluating the candidate's answer
  prompt = ChatPromptTemplate.from_template(
  """### instruction: you are an experienced interviewer.\
   You are interviewing a candidate for the position of {position} .\
   You are tasked to rate an answer provided by the candidate. You should provide a categorical rating and qualitative feedback.\
    The categorical rating should be one of the following values: Good, average, or  Poor.\
      the qualitative feedback should provide sufficient details to justify the categorical rating.\
        the format instructions of the output and the question asked to the candidate and the answer given by the candidate are  given below.\
        ### format instruction: {format_instructions}.\
        ### question:{question}.\
        ### answer:{answer}.\
        ### Rating:
        """
      )

  #using single chain
  chain = LLMChain(llm=llm, prompt=prompt)

  """
  formating the output to JSON:
    {"Rating": "rating provided by LLM"
      "qualitative_feedback":"feedback provided by LLM}
  """
  #defining Rating schema
  Rating_schema = ResponseSchema(name="Rating",
                             description="it was the categorical value for the answer given by the candidate and this value could be poor, average or good. \
                                             ,the categorical value given by you as experienced interviewer. \
                                            after asking a candidate a question related to the position he is appling for")
  #defining feedback schema
  qualitative_feedback_schema = ResponseSchema(name="qualitative_feedback",
                                      description="the qualitative feedback is the sufficient details  which is given by you as an Experienced interviewr. \
                                          the qualitative feedback is given after asking the candidate a question related the the position he is appling for, \
                                           and the candidate provided his answer. \
                                            the qualitative feedback should privide sufficent details to justify the categorical rating ")

  #stacking the two schemas
  response_schemas = [Rating_schema,
                      qualitative_feedback_schema]

  #parsing the output
  output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

  #extrating format instructions
  format_instructions = output_parser.get_format_instructions()

  #dictionary for mapping the question, answer, position and fromat_instructions to the prompt
  question_answer = {"question": question,
                     "answer": answer,
                     "position": position,
                     "format_instructions":format_instructions }

  #running the chain
  response = chain.run(question_answer)

  #output dictionary having two keys "Rating" and "qualitative_feedback"
  output_dict = output_parser.parse(response)


  return output_dict["Rating"],output_dict["qualitative_feedback"]


In [12]:
Rating , feedback = evalution_function("How can you improve a dissatisfied customer's experience?",
                                        "I've found the most successful strategy for turning an unhappy customer into a happy customer is by actively listening to what they're saying. Sometimes, customers just want you to listen to them, and they want to feel like the company cares about them and their opinions.",
                                        "customer service representative",
                                        '',
                                        'mistralai/Mistral-7B-Instruct-v0.1',
                                        0.5,)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]



In [13]:
Rating

'Good'

In [14]:
feedback

'The candidate provided a clear and concise answer that highlights the importance of active listening in resolving customer issues. The answer also shows empathy towards the customer and acknowledges the need for the company to care about their opinions.'