# LangChain Examples 
- PineCone Vector database:  https://app.pinecone.io/

In [2]:
# secrets 
import constants
import os

os.environ["OPENAI_API_KEY"] =  constants.APIKEY
os.environ["OPENAI_ORGANIZATION_ID"] = constants.ORGANIZATION_ID

os.environ["PINECONE_API_KEY"] =  constants.pinecone_api_key
os.environ["PINECONE_ENV"] = constants.pinecone_environment

In [14]:
# RUN LLM STRAIGHT UP 
from langchain.llms import OpenAI

llm = OpenAI(model_name= "text-davinci-003")
PROMPT = "Hello, lets see how this works"
llm(f"{PROMPT}")

'\n\nThis works by allowing you to type in a question and then providing you with a response. It is a great way to learn more about a certain topic or to get help with a specific problem.'

In [12]:
# OPENAI MESSAGE HISTORY
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import ChatOpenAI

# init model
chat = ChatOpenAI(model_name="gpt-3.5-turbo")

# message history
messages = [
    SystemMessage(content="i want you to act like a batman" ),
    HumanMessage(content="hello batman its me joker")
]

# ai response
response = chat(messages)
response

AIMessage(content="Ah, Joker. I've been expecting you. What mischief are you up to this time?", additional_kwargs={}, example=False)

In [18]:
# PROMPT TEMPLATING

from langchain import PromptTemplate

template = """
you are an expert data scientist with expertise in building machine learning ai models. Please plan out the coding project for {concept} in a couple of lines.
"""


prompt = PromptTemplate(
    input_variables = ["concept"],
    template=template
)

# CHAINS
- Chains are LLM prompts that take inputs...
- This allows for chains to <strong>link</strong> together....
- Using one chain's output as another chain's input

In [19]:
# LLM
from langchain.llms import OpenAI

# TEMPLATING CHAINS
from langchain.chains import LLMChain
from langchain import PromptTemplate

# MERGE MULTIPLE CHAINS
from langchain.chains import SimpleSequentialChain


# SELECT LLM
llm = OpenAI(model_name= "text-davinci-003")  # Corrected model name

# CREATE 2 PROMPT TEMPLATES
prompt1 = PromptTemplate(
    input_variables=["concept"],
    template = """
you are an expert data scientist with expertise in building machine learning ai models. Please plan out the coding project for {concept} in a couple of lines. Make sure to explain step by step.
"""
)

prompt2 = PromptTemplate(
    input_variables=["ml_concept"],
    template = "Turn the concept descrition of {ml_concept} and explain it to me like im five in a 1000 words."
)




# CREATE CHAINS
chain_one = LLMChain(llm=llm, prompt=prompt1)
chain_two = LLMChain(llm=llm, prompt=prompt2)


# MERGE CHAINS
Merge_Chains = [chain_one, chain_two]
overall_chain = SimpleSequentialChain(chains=Merge_Chains , verbose = True)


# DISPLAY OUTPUT
explaination = overall_chain.run("neuroscience applied marketing tricks")
print(explaination)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3m
1. Gather data from existing datasets on neuroscience and marketing, such as customer behavior, market trends, and consumer preferences. 

2. Pre-process the data by cleaning, normalizing, and transforming it into a format that can be used in the model.

3. Split the data into training and test sets.

4. Select an appropriate machine learning model for the task and train it on the training data.

5. Evaluate the model’s performance on the test data.

6. Analyze the results and fine-tune the model to optimize performance.

7. Deploy the trained model in a production environment.[0m
[33;1m[1;3m

Gathering data from existing datasets is the first step in the process of using machine learning to gain insights from neuroscience and marketing data. This data could include information about customer behavior, market trends, and consumer preferences. After gathering the data, it is important to pre-process it. This can in

# EMBEDDINGS & VECTORE STORES
- A vector (numeric) representation of text
- Save embeddings of split text into 3RD party Vector Database using SDK



_run `CHAINS` cells before observing_

In [20]:
from langchain.text_splitter import  RecursiveCharacterTextSplitter

# TAKE COMBINED CHAIN 
explaination = overall_chain.run("neuroscience applied marketing tricks")

# MAKE A SPLITTER OBJECT - SPLIT DATA INTO 100 CHUNKS OF TEXT
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap = 0,
)

# SPLIT OUR COMBINED LLM CHAIN
texts = text_splitter.create_documents([explaination])


# PRINT A SINGLE ITEM FROM SPLIT
texts



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3m
1. Collect and process data related to neuroscience applied marketing, including customer demographics, campaign performance, website analytics, and purchase data. 

2. Use exploratory data analysis to identify patterns and relationships in the data.

3. Build a predictive model using machine learning algorithms such as logistic regression, decision trees, and random forests.

4. Use the model to determine which customer segments should be targeted for marketing campaigns.

5. Use the model to develop a marketing strategy, including tactics, content, and messaging.

6. Test the marketing strategy to measure the impact of different tactics and refine the model accordingly.

7. Analyze the results to identify areas for further optimization and develop insights to inform future campaigns.[0m
[33;1m[1;3m

Marketing is a great way to get the word out about a product or service. It involves spreading the word to potenti

[Document(page_content='Marketing is a great way to get the word out about a product or service. It involves spreading the', metadata={}),
 Document(page_content='word to potential customers, which can be done in many ways such as through advertising, word of', metadata={}),
 Document(page_content='mouth, and using social media. To make sure that the marketing efforts are successful, it is', metadata={}),
 Document(page_content='important to collect data related to the target customer and the effectiveness of the marketing', metadata={}),
 Document(page_content='campaigns.', metadata={}),
 Document(page_content='In the case of neuroscience applied marketing, this data could include information about the', metadata={}),
 Document(page_content='customer demographics, such as age, gender, and location, as well as campaign performance data,', metadata={}),
 Document(page_content='website analytics, and purchase data. This data can then be used to identify patterns and', metadata={}),
 Docu

In [21]:
# EMBEDDING
from langchain.embeddings import OpenAIEmbeddings

# AVAILABLE EMBEDDINGS
    # https://platform.openai.com/docs/guides/embeddings/what-are-embeddings

embeddings = OpenAIEmbeddings(model_name="text-embedding-ada-002")

# embeddings.json

                    model_name was transferred to model_kwargs.
                    Please confirm that model_name is what you intended.


In [31]:
# SAVE TO PINECONE VECTOR DATABASE
import pinecone as pvdb
from langchain.vectorstores import Pinecone


# INIT PINECONE CLIENT
pvdb.init(
    api_key = os.environ['PINECONE_API_KEY'],
    environment = os.environ['PINECONE_ENV']
)



# SAVE INTO DB
index_name = "langchain-quickstart"
if index_name not in pvdb.list_indexes():
    # we create a new index
    pvdb.create_index(
      name=index_name,
      metric='cosine',
      dimension=1536  
)


# SEARCH DATABASE OBJECT
search = Pinecone.from_documents(
    texts,
    embeddings,
    index_name=index_name
)


query = "What is magical about machine learning?"
result = search.similarity_search(query=query)
result

[Document(page_content='learning algorithms such as logistic regression, decision trees, and random forests. These', metadata={}),
 Document(page_content='learning algorithms such as logistic regression, decision trees, and random forests. These', metadata={}),
 Document(page_content='Once the data has been collected and analyzed, a predictive model can then be built using machine', metadata={}),
 Document(page_content='Once the data has been collected and analyzed, a predictive model can then be built using machine', metadata={})]

# AGENTS
- runs tools that we import to help extend the LLM's abilities

In [3]:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.llms.openai import OpenAI

# CREATE AGENT EXECUTOR
agent_executor = create_python_agent(
    llm=OpenAI(temperature=0,max_tokens=1000),
    tool=PythonREPLTool(),
    verbose=True
)

agent_executor.run("Find the roots (zeros) if the quadratic function 3 * x**2 + 2*x -1")





[1m> Entering new AgentExecutor chain...[0m


Python REPL can execute arbitrary code. Use with caution.


[32;1m[1;3m I need to solve a quadratic equation
Action: Python_REPL
Action Input: from scipy.optimize import root[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3m I need to define the function
Action: Python_REPL
Action Input: def f(x): return 3 * x**2 + 2*x -1[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3m I need to find the roots of the function
Action: Python_REPL
Action Input: root(f, [0, 1])[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: The roots of the quadratic function 3 * x**2 + 2*x -1 are -1.0 and 0.3333333333333333.[0m

[1m> Finished chain.[0m


'The roots of the quadratic function 3 * x**2 + 2*x -1 are -1.0 and 0.3333333333333333.'