## Load environment variables

In [None]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

## Run basic query with OpenAI wrapper

In [None]:
from langchain.llms import OpenAI
llm = OpenAI(model_name="text-davinci-003")
llm("explain large language models in two sentences")

# import schema for chat messages and ChatOpenAI in order to query chatmodels GPT-3.5-turbo or GPT-4

In [None]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import ChatOpenAI

In [None]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
messages = [
    SystemMessage(content="You are an expert data scientist."),
    HumanMessage(content="Write a Python Script that trains a neural network on simulated data")
]
response = chat(messages)
print(response.content,end='\n')

# Import prompt and define PromptTemplate

In [None]:
from langchain import PromptTemplate

template = """
You are an expert data scientist with an expertise in building deep learning models.
Explain the concept of {concept} in a couple of lines
"""

prompt= PromptTemplate(
    input_variables=["concept"],
    template=template,
)
print(prompt)

# Run LLM with PromptTemplate

In [None]:
llm(prompt.format(concept="autoencoder"))

# Import LLMChain and define chain with language model and prompt as arguments.

In [None]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable
print(chain.run("autoencoder"))

# Define a second prompt

In [None]:
second_prompt = PromptTemplate(
    input_variables=["ml_concept"],
    template="Turn the concept description of {ml_concept} and explain it to me like i'm five in 500 words"
)

chain_two = LLMChain(llm=llm, prompt=second_prompt)

# Define a sequential chain using the two chains above: the second chain takes the output of the first chain as input

In [None]:
from  langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)

#Run the chain specifying only the input variable fot the first chain.
explanation = overall_chain.run("autoencoder")
print(explanation)

# Import utility for splitting up texts and split up the explanation given above into document chunks

In [63]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=0,
)

texts = text_splitter.create_documents([explanation])

# Individual text chunks can be accessed with "page_content"

In [64]:
texts[0].page_content

'An autoencoder is a type of computer program that can take data, such as pictures, and find ways to'

# Import and instantiate OpenAI embeddings

In [65]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model_kwargs={"model_name": "ada"})

# Turn the first text chunk into a vector with the embedding

In [66]:
query_result = embeddings.embed_query(texts[0].page_content)
print(query_result)

[-0.04287624388280682,
 -0.011766167346579842,
 -0.0065030808940420405,
 -0.004267647054993801,
 -0.00788085192657189,
 0.022815894370917012,
 -0.006279192827679956,
 -0.03460961675630522,
 -0.0032412071910525686,
 -0.027114540029663042,
 0.0034237617678795954,
 0.03268073610004418,
 -0.014852375465275005,
 0.0021338235274969003,
 0.016932810231965842,
 0.008163295032478328,
 -0.00135968812562334,
 0.013309271871588674,
 -0.009499733506795621,
 -0.019013244998656677,
 -0.031275411425689716,
 0.017607918461657206,
 -0.010161063751421548,
 -0.02481366308986023,
 -0.01886169088822688,
 -0.004512201167631539,
 0.018613690882339034,
 -0.02402833284229038,
 -0.009554844515734864,
 0.002722820747513039,
 0.018186582658535518,
 -0.041498475178583225,
 -0.006382525853025726,
 -0.04546645664633879,
 -0.02099723573253446,
 -0.002743487259449943,
 0.0033669287436377967,
 -0.021383011863786668,
 0.006079416235182384,
 -0.0022061563192760643,
 0.034168728684791275,
 -0.006757968495478856,
 -0.007570

# Import and initialize Pinecone client

In [81]:
import pinecone
from langchain.vectorstores import Pinecone
from decouple import config

pinecone.init(
    api_key= config("PINECONE_API_KEY"),
    environment = config("PINECONE_ENV")
)

# Upload vectors to Pinecone

In [82]:
index_name = "langchain-quickstart"
search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

# Do a simple vector similarity search

In [83]:
query = "What is magical about an encoder?"
result = search.similarity_search(query)
print(result)