# Install Dependencies
Uncomment if running the notebook for the first time

In [1]:
# !pip install langchain
# !pip install openai

# Import Dependencies

In [178]:
import os
from typing import Dict, List
import json
import random
import gdown

from langchain import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.chains.base import Chain

# Set OpenAI API Key

In [35]:
open_ai_api_key = "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"
os.environ["OPENAI_API_KEY"] = open_ai_api_key

# Go over the [LangChain](https://langchain.readthedocs.io/en/latest/index.html) documents and figure out how to set temperature for your requests

In [36]:
default_temp = 0.7
llm = OpenAI(model_name="text-davinci-003", 
             temperature=default_temp)

# 4.) Implement problem 3 using LangChain's LLM and the PromptTemplate classes and check if it works.

In [205]:
country_of_origin = "India"
template_1 = "Question: Come up with 10 girl baby names for babies born in {country_of_origin} which start and end with letter 'A.'\nAnswer: "
prompt_1 = PromptTemplate(template=template_1, 
                          input_variables=["country_of_origin"])
formatted_prompt_1 = prompt_1.format(country_of_origin=country_of_origin)

In [207]:
llm_answer_1 = llm(formatted_prompt_1)
print(f"{formatted_prompt_1}{llm_answer_1}")

Question: Come up with 10 girl baby names for babies born in India which start and end with letter 'A.'
Answer: 
1. Aanya
2. Aaradhya
3. Aarohi
4. Aashi
5. Aayat
6. Aayushi
7. Aishani
8. Aishwarya
9. Amaya
10. Anaya


# 5.) Create a new prompt that takes a baby name and the country of origin, and comes up with a short (made up) biography. 

In [203]:
baby_name = "Kunal"
country_of_origin = "India"
template_2 = "Question: Come up with a short (made up) biography for a baby named {baby_name}, born in {country_of_origin}.\nAnswer: "
prompt_2 = PromptTemplate(template=template_2, 
                         input_variables=["country_of_origin", "baby_name"])
formatted_prompt_2 = prompt_2.format(baby_name=baby_name, 
                                     country_of_origin=country_of_origin)

In [208]:
llm_answer_2 = llm(formatted_prompt_2)
print(f"{formatted_prompt_2}{llm_answer_2}")

Question: Come up with a short (made up) biography for a baby named Kunal, born in India.
Answer: 
Kunal was born in India to loving parents. He is a happy and curious baby who loves to explore the world around him. He loves to play with his toys and loves to be cuddled and held by his parents. He loves to laugh and smile and is always eager to learn new things. He is a bright and inquisitive baby who loves to discover new things and loves to be around people. He is a joy to be around and is sure to bring a lot of joy and happiness to his family.


# 6.) Follow the [LangChain example to create a custom chain class](https://langchain.readthedocs.io/en/latest/modules/chains/getting_started.html#create-a-custom-chain-with-the-chain-class), to create class that returns a list of dicts of {baby_name, biography}. You have to use the prompts you created in 4 and 5.

In [177]:
class CustomChain(Chain):
    chain_1: LLMChain
    chain_2: LLMChain

    @property
    def input_keys(self) -> List[str]:
        # Union of the input keys of the two chains.
        all_input_vars = set(self.chain_1.input_keys).union(set(self.chain_2.input_keys))
        return list(all_input_vars)

    @property
    def output_keys(self) -> List[str]:
        return ['output']

    def _call(self, inputs: Dict[str, str]):
        results = list()
        output_1 = self.chain_1.run(inputs)
        output_1 = output_1.splitlines()[1:]
        for i in output_1:
            temp = dict()
            baby_name = i.split(' ')[1]
            inputs['baby_name'] = baby_name
            output_2 = self.chain_2.run(inputs)[1:]
            temp['baby_name'] = baby_name
            temp['biography'] = output_2
            results.append(temp)
        return {'output': results}
    
chain_1 = LLMChain(llm=llm, prompt=prompt_1)
chain_2 = LLMChain(llm=llm, prompt=prompt_2)

custom_chain = CustomChain(chain_1=chain_1, chain_2=chain_2)
custom_chain_output = custom_chain.run({
    "baby_name": "Kunal", 
    "country_of_origin": "India"
})
print(f"Custom Chain Output:\n{custom_chain_output}")

Custom Chain Output:
[{'baby_name': 'Aaradhya', 'biography': 'Aaradhya was born in India amidst much joy and celebration. From the moment she arrived, she was loved and cherished by her family. She quickly grew into a happy and energetic baby, always smiling and full of life. As a child, she loved exploring her world and developed a curiosity for learning new things. She was a creative little girl, who enjoyed drawing and writing stories. Aaradhya was also a great listener, and often could be found intently listening to her elders stories and anecdotes. She was an ambitious and determined young girl, and her parents were proud of her accomplishments. Aaradhya grew up to be an intelligent and kind-hearted young woman, making the world a better place one person at a time.'}, {'baby_name': 'Aanya', 'biography': 'Aanya is a baby born in India to loving parents. She has just recently celebrated her first birthday, and already loves playing with her toys and exploring her world. She loves he

# ArXiv Bulletin
Every day several hundred, if not more, papers appear on ArXiv. For this task you will implement an ArXiv bulletin that gives a list of paper titles and why I should read it in one sentence. Your final output should look something like this (the exact papers will be different):
1. Summarizing Encyclopedic Term Descriptions on the Web <br/>
Why to read: This paper presents a summarization method to produce a single text from multiple descriptions to concisely describe a term from different viewpoints. <br/>
2. Unsupervised Topic Adaptation for Lecture Speech Retrieval <br/>
Why to read: This paper presents a novel approach to improve the quality of a cross-media information retrieval system by adapting acoustic and language models for automatic speech recognition. <br/>

Download a sample of the NLP ArXiv dataset from here. It has metadata for 100 NLP papers as JSON records. For this exercise, you will randomly pick 10 records to show a proof of concept.

In [192]:
gdown.download('https://drive.google.com/uc?id=1_1oPRNSW7QWdlUs-APMV5Y7h6RxU_8gF')
with open('cs.cl.sample100.json') as f:
    data = f.readlines()
parsed = [json.loads(x) for x in data]
sample10 = random.choices(parsed, k=10)
paper_list = [i['title'] for i in sample10]
print(f"Research Paper List:\n{paper_list}")

Downloading...
From: https://drive.google.com/uc?id=1_1oPRNSW7QWdlUs-APMV5Y7h6RxU_8gF
To: /Users/kushagraseth/Documents/Repositories/nlp-244-kushagra/quest-2/cs.cl.sample100.json
100%|██████████| 135k/135k [00:00<00:00, 534kB/s]

Research Paper List:
['Robust Grammatical Analysis for Spoken Dialogue Systems', 'Challenging the principle of compositionality in interpreting natural\n  language texts', 'A Flexible Shallow Approach to Text Generation', 'A Computational Memory and Processing Model for Processing for Prosody', 'Cascaded Markov Models', 'Transducers from Rewrite Rules with Backreferences', 'A Bimachine Compiler for Ranked Tagging Rules', 'Unsupervised Topic Adaptation for Lecture Speech Retrieval', 'Mapping Multilingual Hierarchies Using Relaxation Labeling', 'Improving Term Extraction with Terminological Resources']





You will have to use LangChain for this problem and come up with a suitable zero-shot prompt for "why". Be creative with your prompts and use the prompting best practices as your guide. You can experiment with a few prompt variations on for a single paper in Playground before you use it in your code. Make sure your code runs at T=0.

In [209]:
new_temp = 0
llm = OpenAI(model_name="text-davinci-003", 
             temperature=new_temp)

In [210]:
for i in paper_list:
    paper_title = i
    template = "Research Paper Title: {paper_title}.\nWhy should I read this paper: "
    prompt = PromptTemplate(template=template, 
                            input_variables=["paper_title"])
    formatted_prompt = prompt.format(paper_title=paper_title)
    llm_answer = llm(formatted_prompt)
    print(f"{paper_title}.\nWhy should I read this paper as an NLP student: {llm_answer}\n")

Robust Grammatical Analysis for Spoken Dialogue Systems.
Why should I read this paper as an NLP student: 

This paper is an important read for anyone interested in the development of spoken dialogue systems. It provides a comprehensive overview of the current state of the art in robust grammatical analysis for spoken dialogue systems, and outlines the challenges and opportunities for further research in this area. The paper also presents a novel approach to robust grammatical analysis, which could be used to improve the accuracy and reliability of spoken dialogue systems.

Challenging the principle of compositionality in interpreting natural
  language texts.
Why should I read this paper as an NLP student: 

This paper provides an in-depth analysis of the principle of compositionality in interpreting natural language texts. It examines the limitations of this principle and proposes alternative approaches to interpreting natural language texts. The paper also provides a comprehensive ov