In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [2]:
from langchain.llms import OpenAI

In [3]:
llm  = OpenAI(model_name = "gpt-3.5-turbo")



In [4]:
llm("explain large language models in one sentence")

'Large language models are advanced artificial intelligence systems that are trained on huge amounts of text data to generate realistic and coherent human-like text.'

In [5]:
#schema

In [6]:
from langchain.schema import(
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import ChatOpenAI

In [7]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0.3)
messages = [
    SystemMessage(content="You are an expert data scientist"),
    HumanMessage(content="Write a Python script that trains a neural network on simulated data ")
]
response=chat(messages)
response


AIMessage(content="Sure! Here is an example Python script that trains a simple neural network on simulated data using the TensorFlow library:\n\n```python\nimport numpy as np\nimport tensorflow as tf\n\n# Generate simulated data\nnp.random.seed(0)\nX = np.random.rand(100, 2)\ny = np.random.randint(0, 2, 100)\n\n# Define the neural network architecture\nmodel = tf.keras.models.Sequential([\n    tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),\n    tf.keras.layers.Dense(64, activation='relu'),\n    tf.keras.layers.Dense(1, activation='sigmoid')\n])\n\n# Compile the model\nmodel.compile(optimizer='adam',\n              loss='binary_crossentropy',\n              metrics=['accuracy'])\n\n# Train the model\nmodel.fit(X, y, epochs=10, batch_size=32)\n\n# Evaluate the model\nloss, accuracy = model.evaluate(X, y)\nprint(f'Loss: {loss}, Accuracy: {accuracy}')\n```\n\nIn this script:\n- We first generate some simulated data with 2 features and binary labels.\n- We define a simple n

In [8]:
print(response.content,end='\n')

Sure! Here is an example Python script that trains a simple neural network on simulated data using the TensorFlow library:

```python
import numpy as np
import tensorflow as tf

# Generate simulated data
np.random.seed(0)
X = np.random.rand(100, 2)
y = np.random.randint(0, 2, 100)

# Define the neural network architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(2,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)

# Evaluate the model
loss, accuracy = model.evaluate(X, y)
print(f'Loss: {loss}, Accuracy: {accuracy}')
```

In this script:
- We first generate some simulated data with 2 features and binary labels.
- We define a simple neural network with 2 hidden layers and compile it wi

In [9]:
#prompt engineering

In [10]:
from langchain import PromptTemplate
template = """
You are an expert data scientist with an expertise in building deep learning models. 
Explain the concept of {concept} in a couple of lines
"""
prompt = PromptTemplate(
    input_variables=["concept"],
    template=template,
)

In [11]:
prompt

PromptTemplate(input_variables=['concept'], output_parser=None, partial_variables={}, template='\nYou are an expert data scientist with an expertise in building deep learning models. \nExplain the concept of {concept} in a couple of lines\n', template_format='f-string', validate_template=True)

In [12]:
llm(prompt.format(concept="autoencoder"))

'An autoencoder is a type of artificial neural network used for unsupervised learning that aims to learn efficient representations of input data by encoding it into a latent space and then decoding it back to the original input. It is often used for tasks such as dimensionality reduction, data denoising, and anomaly detection.'

In [13]:
#chain

In [14]:
# Import LLMChain and define chain with language model and prompt as arguments.

from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run("autoencoder"))

An autoencoder is a type of neural network that learns to encode data in a lower-dimensional representation and then decode it back to its original form. It is commonly used for tasks such as dimensionality reduction, data compression, and anomaly detection.


In [15]:
# Define a second prompt 

second_prompt = PromptTemplate(
    input_variables=["ml_concept"],
    template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words",
)
chain_two = LLMChain(llm=llm, prompt=second_prompt)
     

In [16]:
from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)

# Run the chain specifying only the input variable for the first chain.
explanation = overall_chain.run("autoencoder")
print(explanation)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mAn autoencoder is a type of neural network that learns to encode input data into a more compact representation and then decode it back to its original form. It is commonly used for tasks such as data compression, denoising, and feature extraction in machine learning and deep learning applications.[0m
[33;1m[1;3mAlright, imagine you have a magic puzzle box that can turn big, messy pictures into smaller, simpler ones. First, you put a picture in the box, and the box learns how to squish it down into a tiny puzzle. Then, when you want the big picture back, the box can un-squish the puzzle and make the big picture appear again!

This magic puzzle box is called an autoencoder. It's like a special brain that can take a complicated picture, like one with lots of details and colors, and turn it into a smaller, simplified version. It's kind of like when you take a big jigsaw puzzle and fit all the pieces together to make a 

In [17]:
#embeddings and vectors

In [18]:
# Import utility for splitting up texts and split up the explanation given above into document chunks

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 0,
)

texts = text_splitter.create_documents([explanation])

In [19]:
texts[0].page_content

'Alright, imagine you have a magic puzzle box that can turn big, messy pictures into smaller, simpler'

In [36]:
import tiktoken

In [39]:
# Choose an appropriate encoding name
encoding_name = "cl100k_base"  # Common encoding for many models

# Get the tokenizer for the specified encoding
encoding = tiktoken.get_encoding(encoding_name)
encoding

<Encoding 'cl100k_base'>

In [40]:
tokens = encoding.encode(texts[0].page_content)
tokens

[72586,
 11,
 13085,
 499,
 617,
 264,
 11204,
 25649,
 3830,
 430,
 649,
 2543,
 2466,
 11,
 46946,
 9364,
 1139,
 9333,
 11,
 35388]

In [41]:
text_chunk = encoding.decode(tokens)
text_chunk

'Alright, imagine you have a magic puzzle box that can turn big, messy pictures into smaller, simpler'

In [64]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model_name="ada")

In [65]:
import numpy as np

In [66]:
tokens_vector = np.array(tokens)
tokens_vector

array([72586,    11, 13085,   499,   617,   264, 11204, 25649,  3830,
         430,   649,  2543,  2466,    11, 46946,  9364,  1139,  9333,
          11, 35388])

In [67]:
# Print the tokens and the vector
print("Tokens:", tokens)
print("Tokens Vector:", tokens_vector)

# Save the vector to a file if needed
np.save("tokens_vector.npy", tokens_vector)

# To load the vector back
loaded_vector = np.load("tokens_vector.npy")
print("Loaded Vector:", loaded_vector)

Tokens: [72586, 11, 13085, 499, 617, 264, 11204, 25649, 3830, 430, 649, 2543, 2466, 11, 46946, 9364, 1139, 9333, 11, 35388]
Tokens Vector: [72586    11 13085   499   617   264 11204 25649  3830   430   649  2543
  2466    11 46946  9364  1139  9333    11 35388]
Loaded Vector: [72586    11 13085   499   617   264 11204 25649  3830   430   649  2543
  2466    11 46946  9364  1139  9333    11 35388]


In [68]:
# Import and initialize Pinecone client

import os
import pinecone
from langchain.vectorstores import Pinecone


pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),  
    environment=os.getenv('PINECONE_ENV')  
)

In [69]:
# Upload vectors to Pinecone

index_name = "langchain-quickstart"
search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

ValueError: No active indexes found in your Pinecone project, are you sure you're using the right API key and environment?

In [70]:
#agent

In [80]:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.llms.openai import OpenAI

agent_executor = create_python_agent(
    llm=OpenAI(model_name = "gpt-3.5-turbo",temperature=0, max_tokens=1000),
    tool=PythonREPLTool(),
    verbose=True,
)

In [81]:
agent_executor.run("Find the roots (zeros) if the quadratic function 3 * x**2 + 2*x -1")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mWe can use the quadratic formula to find the roots of the quadratic function.
Action: Python REPL
Action Input: import math[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3mWe need to define the coefficients a, b, and c of the quadratic function.
Action: Python REPL
Action Input: a = 3, b = 2, c = -1[0m
Observation: [36;1m[1;3minvalid syntax. Maybe you meant '==' or ':=' instead of '='? (<string>, line 1)[0m
Thought:[32;1m[1;3mI made a mistake in defining the coefficients. Let me correct that.
Action: Python REPL
Action Input: a = 3[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3mI need to define the other coefficients as well.
Action: Python REPL
Action Input: b = 2[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3mI need to define the last coefficient.
Action: Python REPL
Action Input: c = -1[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3mNow that I have defined the coefficients, I can us

'The roots of the quadratic function 3 * x**2 + 2*x -1 are x = 0.333 and x = -1.0'