In [2]:
import transformers
from transformers.utils import send_example_telemetry
from datasets import load_dataset, load_metric
from datasets import ClassLabel, Sequence
import random
import pandas as pd
from IPython.display import display, HTML

In [3]:
squad_v2 = True
model_checkpoint = "distilbert-base-uncased"
batch_size = 16

In [4]:
datasets = load_dataset('squad_v2' if squad_v2 else 'squad')

In [5]:
datasets["train"][10]

{'id': '56d43c5f2ccc5a1400d830ab',
 'title': 'Beyoncé',
 'context': 'Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyoncé\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".',
 'question': 'What was the first album Beyoncé released as a solo artist?',
 'answers': {'text': ['Dangerously in Love'], 'answer_start': [505]}}

In [6]:
def show_random_elements(dataset, num_examples = 5):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
    picks.append(pick)
    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
        elif isinstance(typ, Sequence) and isinstance(typ.feature, ClassLabel):
            df[column] = df[column].transform(lambda x: [typ.feature.names[i] for i in x])
        display(HTML(df.to_html()))

In [7]:
show_random_elements(datasets["train"])

Unnamed: 0,id,title,context,question,answers
0,56f98b409e9bad19000a0a8b,Brain,"Another approach to brain function is to examine the consequences of damage to specific brain areas. Even though it is protected by the skull and meninges, surrounded by cerebrospinal fluid, and isolated from the bloodstream by the blood–brain barrier, the delicate nature of the brain makes it vulnerable to numerous diseases and several types of damage. In humans, the effects of strokes and other types of brain damage have been a key source of information about brain function. Because there is no ability to experimentally control the nature of the damage, however, this information is often difficult to interpret. In animal studies, most commonly involving rats, it is possible to use electrodes or locally injected chemicals to produce precise patterns of damage and then examine the consequences for behavior.",The brain is surrounded by what type of fluid?,"{'text': ['cerebrospinal fluid'], 'answer_start': [170]}"


Unnamed: 0,id,title,context,question,answers
0,56f98b409e9bad19000a0a8b,Brain,"Another approach to brain function is to examine the consequences of damage to specific brain areas. Even though it is protected by the skull and meninges, surrounded by cerebrospinal fluid, and isolated from the bloodstream by the blood–brain barrier, the delicate nature of the brain makes it vulnerable to numerous diseases and several types of damage. In humans, the effects of strokes and other types of brain damage have been a key source of information about brain function. Because there is no ability to experimentally control the nature of the damage, however, this information is often difficult to interpret. In animal studies, most commonly involving rats, it is possible to use electrodes or locally injected chemicals to produce precise patterns of damage and then examine the consequences for behavior.",The brain is surrounded by what type of fluid?,"{'text': ['cerebrospinal fluid'], 'answer_start': [170]}"


Unnamed: 0,id,title,context,question,answers
0,56f98b409e9bad19000a0a8b,Brain,"Another approach to brain function is to examine the consequences of damage to specific brain areas. Even though it is protected by the skull and meninges, surrounded by cerebrospinal fluid, and isolated from the bloodstream by the blood–brain barrier, the delicate nature of the brain makes it vulnerable to numerous diseases and several types of damage. In humans, the effects of strokes and other types of brain damage have been a key source of information about brain function. Because there is no ability to experimentally control the nature of the damage, however, this information is often difficult to interpret. In animal studies, most commonly involving rats, it is possible to use electrodes or locally injected chemicals to produce precise patterns of damage and then examine the consequences for behavior.",The brain is surrounded by what type of fluid?,"{'text': ['cerebrospinal fluid'], 'answer_start': [170]}"


Unnamed: 0,id,title,context,question,answers
0,56f98b409e9bad19000a0a8b,Brain,"Another approach to brain function is to examine the consequences of damage to specific brain areas. Even though it is protected by the skull and meninges, surrounded by cerebrospinal fluid, and isolated from the bloodstream by the blood–brain barrier, the delicate nature of the brain makes it vulnerable to numerous diseases and several types of damage. In humans, the effects of strokes and other types of brain damage have been a key source of information about brain function. Because there is no ability to experimentally control the nature of the damage, however, this information is often difficult to interpret. In animal studies, most commonly involving rats, it is possible to use electrodes or locally injected chemicals to produce precise patterns of damage and then examine the consequences for behavior.",The brain is surrounded by what type of fluid?,"{'text': ['cerebrospinal fluid'], 'answer_start': [170]}"


Unnamed: 0,id,title,context,question,answers
0,56f98b409e9bad19000a0a8b,Brain,"Another approach to brain function is to examine the consequences of damage to specific brain areas. Even though it is protected by the skull and meninges, surrounded by cerebrospinal fluid, and isolated from the bloodstream by the blood–brain barrier, the delicate nature of the brain makes it vulnerable to numerous diseases and several types of damage. In humans, the effects of strokes and other types of brain damage have been a key source of information about brain function. Because there is no ability to experimentally control the nature of the damage, however, this information is often difficult to interpret. In animal studies, most commonly involving rats, it is possible to use electrodes or locally injected chemicals to produce precise patterns of damage and then examine the consequences for behavior.",The brain is surrounded by what type of fluid?,"{'text': ['cerebrospinal fluid'], 'answer_start': [170]}"


***TASK 1***

In [8]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [9]:
import transformers
assert isinstance(tokenizer, transformers.PreTrainedTokenizerFast)

In [10]:
max_length = 384
doc_stride = 128 

In [11]:
pad_on_right = tokenizer.padding_side == "right"

In [12]:
def prepare_train_features(examples):
    examples["question"] = [q.lstrip() for q in examples["question"]]
    tokenized_examples = tokenizer(
        examples["question" if pad_on_right else "context"],
        examples["context" if pad_on_right else "question"],
        truncation="only_second" if pad_on_right else "only_first",
        max_length=max_length,
        stride=doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )
    sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
    offset_mapping = tokenized_examples.pop("offset_mapping")
    tokenized_examples["start_positions"] = []
    tokenized_examples["end_positions"] = []

    for i, offsets in enumerate(offset_mapping):
        input_ids = tokenized_examples["input_ids"][i]
        cls_index = input_ids.index(tokenizer.cls_token_id)
        sequence_ids = tokenized_examples.sequence_ids(i)
        sample_index = sample_mapping[i]
        answers = examples["answers"][sample_index]
        if len(answers["answer_start"]) == 0:
            tokenized_examples["start_positions"].append(cls_index)
            tokenized_examples["end_positions"].append(cls_index)
        else:
            start_char = answers["answer_start"][0]
            end_char = start_char + len(answers["text"][0])
            token_start_index = 0
            while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
                token_start_index += 1
            token_end_index = len(input_ids) - 1
            while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
                token_end_index -= 1
            if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
                tokenized_examples["start_positions"].append(cls_index)
                tokenized_examples["end_positions"].append(cls_index)
            else:
                while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
                    token_start_index += 1
                tokenized_examples["start_positions"].append(token_start_index - 1)
                while offsets[token_end_index][1] >= end_char:
                    token_end_index -= 1
                tokenized_examples["end_positions"].append(token_end_index + 1)

    return tokenized_examples

In [13]:
features = prepare_train_features(datasets['train'][:5])

In [14]:
tokenized_datasets = datasets.map(prepare_train_features, batched=True, remove_columns=datasets["train"].column_names)

In [15]:
from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)




Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
model_name = model_checkpoint.split("/")[-1]
args = TrainingArguments(
    f"{model_name}-finetuned-squad",
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=2,
    weight_decay=0.01,
)

In [17]:
from transformers import default_data_collator
data_collator = default_data_collator

In [18]:
trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [19]:
trainer.train()

  0%|          | 0/16470 [00:00<?, ?it/s]

{'loss': 2.9326, 'grad_norm': 15.279534339904785, 'learning_rate': 1.939283545840923e-05, 'epoch': 0.06}
{'loss': 1.9637, 'grad_norm': 13.538772583007812, 'learning_rate': 1.878567091681846e-05, 'epoch': 0.12}
{'loss': 1.7487, 'grad_norm': 12.472172737121582, 'learning_rate': 1.817850637522769e-05, 'epoch': 0.18}
{'loss': 1.6117, 'grad_norm': 9.799962997436523, 'learning_rate': 1.7571341833636916e-05, 'epoch': 0.24}
{'loss': 1.5679, 'grad_norm': 25.302701950073242, 'learning_rate': 1.696417729204615e-05, 'epoch': 0.3}
{'loss': 1.4912, 'grad_norm': 20.4501953125, 'learning_rate': 1.6357012750455374e-05, 'epoch': 0.36}
{'loss': 1.455, 'grad_norm': 18.45928955078125, 'learning_rate': 1.5749848208864604e-05, 'epoch': 0.43}
{'loss': 1.39, 'grad_norm': 14.267770767211914, 'learning_rate': 1.5142683667273831e-05, 'epoch': 0.49}
{'loss': 1.3714, 'grad_norm': 20.14078712463379, 'learning_rate': 1.4535519125683062e-05, 'epoch': 0.55}
{'loss': 1.3259, 'grad_norm': 13.556034088134766, 'learning_ra

  0%|          | 0/759 [00:00<?, ?it/s]

{'eval_loss': 1.2443283796310425, 'eval_runtime': 138.2365, 'eval_samples_per_second': 87.777, 'eval_steps_per_second': 5.491, 'epoch': 1.0}
{'loss': 1.1034, 'grad_norm': 9.995841979980469, 'learning_rate': 9.678202792956892e-06, 'epoch': 1.03}
{'loss': 1.0296, 'grad_norm': 21.2807559967041, 'learning_rate': 9.071038251366122e-06, 'epoch': 1.09}
{'loss': 1.0588, 'grad_norm': 18.23143768310547, 'learning_rate': 8.46387370977535e-06, 'epoch': 1.15}
{'loss': 1.0019, 'grad_norm': 16.73717498779297, 'learning_rate': 7.856709168184579e-06, 'epoch': 1.21}
{'loss': 1.0184, 'grad_norm': 14.774055480957031, 'learning_rate': 7.249544626593807e-06, 'epoch': 1.28}
{'loss': 1.0142, 'grad_norm': 14.547158241271973, 'learning_rate': 6.642380085003036e-06, 'epoch': 1.34}
{'loss': 1.0088, 'grad_norm': 14.889763832092285, 'learning_rate': 6.035215543412265e-06, 'epoch': 1.4}
{'loss': 1.0109, 'grad_norm': 17.187137603759766, 'learning_rate': 5.428051001821493e-06, 'epoch': 1.46}
{'loss': 0.9901, 'grad_nor

  0%|          | 0/759 [00:00<?, ?it/s]

{'eval_loss': 1.2927873134613037, 'eval_runtime': 127.9176, 'eval_samples_per_second': 94.858, 'eval_steps_per_second': 5.934, 'epoch': 2.0}
{'train_runtime': 8368.9, 'train_samples_per_second': 31.487, 'train_steps_per_second': 1.968, 'train_loss': 1.2540592937087318, 'epoch': 2.0}


TrainOutput(global_step=16470, training_loss=1.2540592937087318, metrics={'train_runtime': 8368.9, 'train_samples_per_second': 31.487, 'train_steps_per_second': 1.968, 'train_loss': 1.2540592937087318, 'epoch': 2.0})

In [20]:
trainer.save_model("SQuAD_trained")

In [21]:
import torch

In [22]:
for batch in trainer.get_eval_dataloader():
    break
batch = {k: v.to(trainer.args.device) for k, v in batch.items()}
with torch.no_grad():
    output = trainer.model(**batch)
output.keys()

odict_keys(['loss', 'start_logits', 'end_logits'])

In [23]:
n_best_size = 20

In [24]:
import numpy as np

In [25]:
start_logits = output.start_logits[0].cpu().numpy()
end_logits = output.end_logits[0].cpu().numpy()
# Gather the indices the best start/end logits:
start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
valid_answers = []
for start_index in start_indexes:
    for end_index in end_indexes:
        if start_index <= end_index: # We need to refine that test to check the answer is inside the context
            valid_answers.append(
                {
                    "score": start_logits[start_index] + end_logits[end_index],
                    "text": "" # We need to find a way to get back the original substring corresponding to the answer in the context
                }
            )

In [26]:
def prepare_validation_features(examples):
    examples["question"] = [q.lstrip() for q in examples["question"]]
    tokenized_examples = tokenizer(
        examples["question" if pad_on_right else "context"],
        examples["context" if pad_on_right else "question"],
        truncation="only_second" if pad_on_right else "only_first",
        max_length=max_length,
        stride=doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )
    sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
    tokenized_examples["example_id"] = []

    for i in range(len(tokenized_examples["input_ids"])):
        sequence_ids = tokenized_examples.sequence_ids(i)
        context_index = 1 if pad_on_right else 0
        sample_index = sample_mapping[i]
        tokenized_examples["example_id"].append(examples["id"][sample_index])
        tokenized_examples["offset_mapping"][i] = [
            (o if sequence_ids[k] == context_index else None)
            for k, o in enumerate(tokenized_examples["offset_mapping"][i])
        ]

    return tokenized_examples

In [27]:
validation_features = datasets["validation"].map(
    prepare_validation_features,
    batched=True,
    remove_columns=datasets["validation"].column_names
)

Map:   0%|          | 0/11873 [00:00<?, ? examples/s]

In [28]:
raw_predictions = trainer.predict(validation_features)

  0%|          | 0/759 [00:00<?, ?it/s]

In [29]:
validation_features.set_format(type=validation_features.format["type"], columns=list(validation_features.features.keys()))

In [30]:
max_answer_length = 30

In [31]:
start_logits = output.start_logits[0].cpu().numpy()
end_logits = output.end_logits[0].cpu().numpy()
offset_mapping = validation_features[0]["offset_mapping"]
context = datasets["validation"][0]["context"]
start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
valid_answers = []
for start_index in start_indexes:
    for end_index in end_indexes:
        if (
            start_index >= len(offset_mapping)
            or end_index >= len(offset_mapping)
            or offset_mapping[start_index] is None
            or offset_mapping[end_index] is None
        ):
            continue
        if end_index < start_index or end_index - start_index + 1 > max_answer_length:
            continue
        if start_index <= end_index:
            start_char = offset_mapping[start_index][0]
            end_char = offset_mapping[end_index][1]
            valid_answers.append(
                {
                    "score": start_logits[start_index] + end_logits[end_index],
                    "text": context[start_char: end_char]
                }
            )

valid_answers = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[:n_best_size]
valid_answers

[{'score': 13.890465, 'text': 'France'},
 {'score': 6.793297, 'text': 'a region in France'},
 {'score': 6.737341, 'text': 'France.'},
 {'score': 6.577327,
  'text': 'France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway'},
 {'score': 6.0375934, 'text': 'in France'},
 {'score': 5.64405,
  'text': 'France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark'},
 {'score': 4.7346225, 'text': 'Normandy, a region in France'},
 {'score': 4.7201023, 'text': 'region in France'},
 {'score': 4.6967316,
  'text': 'French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France'},
 {'score': 4.562068,
  'text': 'France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland'},
 {'score': 2.8644028, 'text': 'France. They were descended from Norse'},
 {'score': 2.5605

In [32]:
import collections

In [33]:
examples = datasets["validation"]
features = validation_features

example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
features_per_example = collections.defaultdict(list)
for i, feature in enumerate(features):
    features_per_example[example_id_to_index[feature["example_id"]]].append(i)

In [34]:
from tqdm.auto import tqdm

In [35]:
def postprocess_qa_predictions(examples, features, raw_predictions, n_best_size = 20, max_answer_length = 30):
    all_start_logits, all_end_logits = raw_predictions
    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
    features_per_example = collections.defaultdict(list)
    for i, feature in enumerate(features):
        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
    predictions = collections.OrderedDict()
    print(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
    for example_index, example in enumerate(tqdm(examples)):
        feature_indices = features_per_example[example_index]

        min_null_score = None
        valid_answers = []
        
        context = example["context"]
        for feature_index in feature_indices:
            start_logits = all_start_logits[feature_index]
            end_logits = all_end_logits[feature_index]
            offset_mapping = features[feature_index]["offset_mapping"]
            cls_index = features[feature_index]["input_ids"].index(tokenizer.cls_token_id)
            feature_null_score = start_logits[cls_index] + end_logits[cls_index]
            if min_null_score is None or min_null_score < feature_null_score:
                min_null_score = feature_null_score
            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    if (
                        start_index >= len(offset_mapping)
                        or end_index >= len(offset_mapping)
                        or offset_mapping[start_index] is None
                        or offset_mapping[end_index] is None
                    ):
                        continue
                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
                        continue

                    start_char = offset_mapping[start_index][0]
                    end_char = offset_mapping[end_index][1]
                    valid_answers.append(
                        {
                            "score": start_logits[start_index] + end_logits[end_index],
                            "text": context[start_char: end_char]
                        }
                    )
        
        if len(valid_answers) > 0:
            best_answer = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[0]
        else:
            best_answer = {"text": "", "score": 0.0}
        if not squad_v2:
            predictions[example["id"]] = best_answer["text"]
        else:
            answer = best_answer["text"] if best_answer["score"] > min_null_score else ""
            predictions[example["id"]] = answer

    return predictions

In [36]:
final_predictions = postprocess_qa_predictions(datasets["validation"], validation_features, raw_predictions.predictions)

Post-processing 11873 example predictions split into 12134 features.


  0%|          | 0/11873 [00:00<?, ?it/s]

In [37]:
metric = load_metric("squad_v2" if squad_v2 else "squad")

  metric = load_metric("squad_v2" if squad_v2 else "squad")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [38]:
if squad_v2:
    formatted_predictions = [{"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in final_predictions.items()]
else:
    formatted_predictions = [{"id": k, "prediction_text": v} for k, v in final_predictions.items()]
references = [{"id": ex["id"], "answers": ex["answers"]} for ex in datasets["validation"]]
metric.compute(predictions=formatted_predictions, references=references)

{'exact': 64.09500547460625,
 'f1': 67.45900818881681,
 'total': 11873,
 'HasAns_exact': 65.06410256410257,
 'HasAns_f1': 71.80175509882253,
 'HasAns_total': 5928,
 'NoAns_exact': 63.128679562657695,
 'NoAns_f1': 63.128679562657695,
 'NoAns_total': 5945,
 'best_exact': 64.09500547460625,
 'best_exact_thresh': 0.0,
 'best_f1': 67.45900818881698,
 'best_f1_thresh': 0.0}

***TASK 4 Question 1***

In [10]:
from langchain.chains import LLMChain
from langchain import PromptTemplate
from langchain.chains import SimpleSequentialChain, SequentialChain
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
import tenacity
import openai
from ratelimit import limits, sleep_and_retry

In [12]:
llm = ChatOpenAI(openai_api_key = api_key) #replace api_key with your openai api key

  warn_deprecated(


Chian 1

In [14]:
prompt_1_1 = ChatPromptTemplate.from_template("Translate to English:{review}.")
chain_1_1 = LLMChain(llm=llm, prompt=prompt_1_1, output_key='English_Review')

In [15]:
prompt_1_2 = ChatPromptTemplate.from_template('Summarize the review:{English_Review}.')
chain_1_2 = LLMChain(llm=llm, prompt=prompt_1_2, output_key='Summary')

In [16]:
prompt_1_3 = ChatPromptTemplate.from_template('Identify the language:{review}.')
chain_1_3 = LLMChain(llm=llm, prompt=prompt_1_3, output_key='Language')

In [17]:
prompt_1_4 = ChatPromptTemplate.from_template('Use the required languageto write the follow up message. \n Summary:{Summary}\n Language:{Language}')
chain_1_4 = LLMChain(llm=llm, prompt=prompt_1_4, output_key='Followup_Message')

In [18]:
prompt_1_5 = ChatPromptTemplate.from_template('Translate the mssg to English:{Followup_Message}')
chain_1_5 = LLMChain(llm=llm, prompt=prompt_1_5, output_key='English_Followup_Message')

In [19]:
overall_simple_chain_1 = SequentialChain(
    chains=[chain_1_1, chain_1_2, chain_1_3, chain_1_4, chain_1_5],
    input_variables = ['review'],
    output_variables = ['English_Review', 'Summary', 'Language', 'Followup_Message', 'English_Followup_Message'],
    verbose = True)

In [20]:
review = "C'est un très bon restaurant. Ils offrent une grande variété de cuisines. Le goût de tous les plats que j'ai essayés était très bon."

In [21]:
overall_simple_chain_1(review)



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


{'review': "C'est un très bon restaurant. Ils offrent une grande variété de cuisines. Le goût de tous les plats que j'ai essayés était très bon.",
 'English_Review': "It's a very good restaurant. They offer a wide variety of cuisines. The taste of all the dishes I tried was very good.",
 'Summary': 'The reviewer had a positive experience at the restaurant, praising the wide variety of cuisines offered and the taste of the dishes they tried.',
 'Language': 'French',
 'Followup_Message': "Bonjour,\n\nNous avons bien reçu votre avis positif sur notre restaurant et nous vous en remercions sincèrement. Nous sommes ravis de savoir que vous avez apprécié la grande variété de cuisines proposées ainsi que le goût des plats que vous avez essayés.\n\nNous espérons vous revoir bientôt parmi nous pour vous faire découvrir d'autres délices culinaires. \n\nCordialement, \nL'équipe du restaurant",
 'English_Followup_Message': 'Hello,\n\nWe have received your positive review of our restaurant and we si

Chain 2

In [13]:
from langchain.chains.router import MultiPromptChain
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE

In [14]:
subject_prompt_template = """
Given the input: {input}
If the input is related to the subject of {input}, provide an output relevant to that subject.
Otherwise, output: "The input is not related to the subject of {input}."
"""

In [15]:
default_prompt_template = """
Given the input: {input}
Since the input is not related to any specific subject, provide a generic and informative response.
"""

In [16]:
prompt_infos = [
    {
        "name": "Subject",
        "description": "Good for answering questions about Math",
        "prompt_template": subject_prompt_template,
    },
    {
        "name": "Default",
        "description": "Not able to answer the question",
        "prompt_template": default_prompt_template,
    },
]

In [17]:
destination_chains = {}
for prompt_info in prompt_infos: 
    name = prompt_info["name"]
    prompt_template = prompt_info["prompt_template"]
    prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[name] = chain
default_chain = ConversationChain(llm=llm, output_key="text")

In [18]:
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)

In [19]:
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations_str)
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)

In [20]:
router_chain = LLMRouterChain.from_llm(llm, router_prompt)
chain = MultiPromptChain(
    router_chain=router_chain,
    destination_chains=destination_chains,
    default_chain=default_chain,
    verbose=True,
)

In [22]:
print(chain.run("Who won the second world war?"))



[1m> Entering new MultiPromptChain chain...[0m
None: {'input': 'Who won the second world war?'}
[1m> Finished chain.[0m
The Allied Powers, including countries like the United States, the Soviet Union, and the United Kingdom, won the Second World War against the Axis Powers, which included countries like Germany, Japan, and Italy. The war lasted from 1939 to 1945 and resulted in the defeat of the Axis Powers.


In [23]:
print(chain.run('What is a supernova?'))



[1m> Entering new MultiPromptChain chain...[0m
None: {'input': 'What is a supernova?'}
[1m> Finished chain.[0m
A supernova is a powerful and luminous stellar explosion that occurs when a star reaches the end of its life cycle. It is one of the most energetic events in the universe, releasing a huge amount of energy and creating elements heavier than iron. Supernovae can briefly outshine entire galaxies and are responsible for dispersing these elements into space, enriching the interstellar medium for future generations of stars.


In [24]:
print(chain.run("Who is the president of America?"))



[1m> Entering new MultiPromptChain chain...[0m
None: {'input': 'Who is the president of America?'}
[1m> Finished chain.[0m
As of my last update, Joe Biden is the President of the United States. He was inaugurated on January 20, 2021.


In [29]:
print(chain.run("27 + 51"))



[1m> Entering new MultiPromptChain chain...[0m
Subject: {'input': 'What is the result of 27 + 51?'}
[1m> Finished chain.[0m
The result of 27 + 51 is 78.


In [28]:
print(chain.run("Write about WPI"))



[1m> Entering new MultiPromptChain chain...[0m
None: {'input': 'Write about WPI'}
[1m> Finished chain.[0m
Worcester Polytechnic Institute (WPI) is a private research university located in Worcester, Massachusetts. It was founded in 1865 and is known for its strong programs in engineering, science, technology, and business. WPI offers undergraduate and graduate degrees in a variety of disciplines and is particularly well-known for its project-based learning approach, where students work on real-world projects with industry partners. The university is also recognized for its strong emphasis on innovation, entrepreneurship, and interdisciplinary collaboration. Overall, WPI is a top-tier institution known for its commitment to hands-on learning and preparing students for successful careers in their chosen fields.


***Task 4 Question 2***

In [98]:
prompt_2_1 = "Give me the name for a company which produces:{product}."
prompt_2_2 = "Please give a description for this company{name}"

In [99]:
first_prompt = ChatPromptTemplate.from_template(prompt_2_1)
second_prompt = ChatPromptTemplate.from_template(prompt_2_2)

In [100]:
chain_one = LLMChain(llm=llm, prompt = first_prompt)
chain_two = LLMChain(llm=llm, prompt = second_prompt)

In [103]:
chain1 = SimpleSequentialChain(chains = [chain_one, chain_two], verbose = True)
chain1.run('Orange Juice')



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mZesty Squeeze Co.[0m
[33;1m[1;3mZesty Squeeze Co. is a food and beverage company that specializes in creating delicious and refreshing juice blends. Using only the freshest fruits and vegetables, they offer a wide range of unique and flavorful concoctions that are perfect for any occasion. With a focus on providing high-quality, all-natural products, Zesty Squeeze Co. is committed to helping customers live a healthy and vibrant lifestyle. Whether you're looking for a morning pick-me-up or a tasty treat to enjoy throughout the day, Zesty Squeeze Co. has something for everyone.[0m

[1m> Finished chain.[0m


"Zesty Squeeze Co. is a food and beverage company that specializes in creating delicious and refreshing juice blends. Using only the freshest fruits and vegetables, they offer a wide range of unique and flavorful concoctions that are perfect for any occasion. With a focus on providing high-quality, all-natural products, Zesty Squeeze Co. is committed to helping customers live a healthy and vibrant lifestyle. Whether you're looking for a morning pick-me-up or a tasty treat to enjoy throughout the day, Zesty Squeeze Co. has something for everyone."