In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [3]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [4]:
file = 'movies_data.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [5]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [6]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

In [7]:
data[10]

Document(page_content="star_rating: 8.8\ntitle: The Lord of the Rings: The Fellowship of the Ring\ncontent_rating: PG-13\ngenre: Adventure\nduration: 178\nactors_list: [u'Elijah Wood', u'Ian McKellen', u'Orlando Bloom']", metadata={'source': 'movies_data.csv', 'row': 10})

In [8]:
data[11]

Document(page_content="star_rating: 8.8\ntitle: Inception\ncontent_rating: PG-13\ngenre: Action\nduration: 148\nactors_list: [u'Leonardo DiCaprio', u'Joseph Gordon-Levitt', u'Ellen Page']", metadata={'source': 'movies_data.csv', 'row': 11})

In [9]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

In [10]:
from langchain.evaluation.qa import QAGenerateChain


In [11]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(model=llm_model))

In [12]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)



In [13]:
new_examples[0]

{'query': 'What is the star rating for "The Shawshank Redemption"?',
 'answer': 'The star rating for "The Shawshank Redemption" is 9.3.'}

In [14]:
data[0]

Document(page_content="star_rating: 9.3\ntitle: The Shawshank Redemption\ncontent_rating: R\ngenre: Crime\nduration: 142\nactors_list: [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunton']", metadata={'source': 'movies_data.csv', 'row': 0})

In [None]:
predictions = qa.apply(examples)



[1m> Entering new  chain...[0m


In [None]:
llm = ChatOpenAI(temperature=0, model=llm_model)
eval_chain = QAEvalChain.from_llm(llm)