In [None]:
from langchain.llms import AI21
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

# AI21_API_KEY = os.environ.get("AI21_API_KEY")
# AI21_API_KEY

In [None]:
llm = AI21(temperature=0.7)
text = "what are 5 vacation destination for Chai Lovers?"
print(llm(text))

# Prompt Templates: Manage prompts for LLMs

In [None]:
from langchain.prompts import PromptTemplate

In [None]:
prompt = PromptTemplate(
    input_variables=["food"],
    template="What are 5 vacation destination for someone who like {food}?",
)

In [None]:
prompt.format(food=input())

In [None]:
print(llm(prompt.format(food=input())))

Chains: combine LLms and prompts in multi-step workflow

In [None]:
from langchain.chains import LLMChain

prompt = PromptTemplate(
    input_variables=["input"],
    template="What are 5 vacation destination for someone who like {input}?",
)

llm_chain = LLMChain(
    llm=llm, # AI21
    prompt=prompt, # PromptTemplate
)

print(llm_chain.run(input="chicken")) # use of chain 

In [None]:
from langchain.agents import load_tools, initialize_agent

In [None]:
os.environ["SERPAPI_API_KEY"] = "38da6f742ea2ceb31376aeca7e4c3c5cd9ba10ca729c89056be5219c7369355a"
tools = load_tools(['serpapi', 'llm-math'], llm=llm)

In [None]:
print(tools)

In [None]:
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [None]:
agent.run("who is the CEO of Microsoft and who is the director of it?")

In [None]:
from langchain import ConversationChain

In [None]:
conversation = ConversationChain(llm=llm, verbose=True)

conversation.predict(input="Hi there!")

In [None]:
conversation.predict(input="write me a code for a flask Application using python")

In [None]:
conversation.predict(input="Add a function API in flask for taking inputs from user and returning the output")

In [None]:
conversation.predict(input="Add a function API in flask for taking inputs from user and returning the output")

In [None]:
import pypdfium2 as pdfium
import matplotlib.pyplot as plt
from PIL import Image
from io import BytesIO

In [None]:
def convert_pdf_to_image(file_path, scale=300/72):

    pdf_file = pdfium.PdfDocument(file_path)
    page_indices = [i for i in range(len(pdf_file))]

    renderer = pdf_file.render(
        pdfium.PdfBitmap.to_pil,
        page_indices=page_indices,
        scale=scale,
    )

    list_final_images = []

    for i, image in zip(page_indices, renderer):

        image_byte_array = BytesIO()
        image.save(image_byte_array, format="PNG")
        image_byte_array = image_byte_array.getvalue()
        list_final_images.append(dict({i:image_byte_array}))

    return list_final_images

In [None]:
def display_image(list_dict_final_images):
    
    all_images = [list(data.values())[0] for data in list_dict_final_images]
    
    for index, image_bytes in enumerate(all_images):
        
        image = Image.open(BytesIO(image_bytes))
        figure = plt.figure(figsize= (image.width / 100, image.height / 100))

        plt.title(f"----- Page {index+1} ----")
        plt.imshow(image)
        plt.axis("off")
        plt.show()


In [None]:
convert_pdf_to_image = convert_pdf_to_image(r"C:\Users\PC\Desktop\Assignment.pdf")

In [None]:
display_image(convert_pdf_to_image)

# use pytesseract

In [None]:
from pytesseract import pytesseract

In [70]:
def extract_text_with_pytesseract(list_dict_final_images):

    image_list = [list(data.values())[0] for data in list_dict_final_images]
    image_content = []
    pytesseract.tesseract_cmd = r"C:\Users\PC\AppData\Local\Tesseract-OCR\tesseract.exe"

    for index, image_bytes in enumerate(image_list):

        image = Image.open(BytesIO(image_bytes))
        raw_text = pytesseract.image_to_string(image)
        image_content.append(str(raw_text))

    return "\n".join(image_content)

In [71]:
text_with_pytesseract = extract_text_with_pytesseract(convert_pdf_to_image)

In [72]:
print(text_with_pytesseract)

08/10/2023, 16:30

In [1]:

In [2]:

localhost:8888/notebooks/Downloads/08.Programming Assignment_08.ipynb

08.Programming Assignment_08 - Jupyter Notebook

Assignment 8 Solutions

1. Write a Python Program to Add two Matrices ?

14
15
16
17

Inputs
Output
Inputs
Output

def

addMatrices(a,b):
print(f'Inputs: {a}, {b}')
if len(a) == len(b):
out_matrix = []
for ele in range(len(a)):
if len(a[ele]) == len(b[ele]):
out_matrix.append([])
for sub_ele in range(len(a[ele])):
out_matrix[ele].append(a[ele][sub_ele]+b[ele][sub_ele])
else:
print('Both Matrices must contains same no of rows and colum
else:
print('Both Matrices must contains same no of rows and columns’ )
print(f'Output: f{out_matrix}')

addMatrices([[1,2,3],[4,5,6],[7,8,9]],[[9,8,7],[6,5,4],[3,2,1]])
addMatrices([[2,3,5],[1,1,1],[2,2,2]],[[4,3,5],[1,2,3],[3,2,1]])

: [[1, 2, 3], [4, 5, 6], [7, 8, 9]],[[9, 8, 7], [6, 5, 4], [3, 2, 1]]
: [[10, 10, 10], [10, 10, 10], [10, 10, 10]]

: [[2, 3, 5], [1, 1, 1], [2, 2, 2]],[[4, 3, 5], [1, 

# Use OCR

In [None]:
from easyocr import Reader

language_reader = Reader(['en'])

In [None]:
def extract_text_with_easyocr(list_dict_final_images):
    
    image_list = [list(data.values())[0] for data in list_dict_final_images]
    image_content = []

    for index, image_bytes in enumerate(image_list):

        image = Image.open(BytesIO(image_bytes))
        raw_text = language_reader.readtext(image)
        raw_text = " ".join([res[1] for res in raw_text])

        image_content.append(raw_text)
    
    return "\n".join(image_content)

In [None]:
text_with_easyocr = extract_text_with_easyocr(convert_pdf_to_image)
print(text_with_easyocr)# OCr fails as its rding the files in a straignt line

# PyPdf2

In [None]:
from PyPDF2 import PdfReader

In [None]:
def extract_text_with_Pypdf2(pdf_file):

    pdf_reader = PdfReader(pdf_file)

    raw_text = " "

    for i, page in enumerate(pdf_reader.pages):

        text = page.extract_text()
        if text:
            raw_text += text

    return raw_text


In [None]:
text_with_pypdf2 = extract_text_with_Pypdf2(r"C:\Users\PC\Desktop\Assignment.pdf")
print(text_with_pypdf2)

# Langchain

In [None]:
from langchain.document_loaders.image import UnstructuredImageLoader

def extract_text_with_langchain(list_dict_final_images):

    image_list = [list(data.values())[0] for data in list_dict_final_images]
    image_content = []

    for index, image_bytes in enumerate(image_list):

        image = Image.open(BytesIO(image_bytes))
        loader = UnstructuredImageLoader(image)
        data = loader.load()
        raw_text = data[index].page_content

        image_content.append(raw_text)
    
    return "\n".join(image_content)

In [None]:
text_with_langchain = extract_text_with_langchain(convert_pdf_to_image)
text_with_langchain

In [None]:
from langchain.document_loaders import UnstructuredFileLoader, UnstructuredPDFLoader

In [None]:
def extract_text_with_langchain_doc_loader(pdf_file):

    loader = UnstructuredFileLoader(pdf_file)
    documents = loader.load()
    pdf_file_content = "\n".join([doc.page_content for doc in documents])

    return pdf_file_content

In [None]:
extract_text_with_langchain_doc_loader(r"C:\Users\PC\Desktop\Assignment.pdf")

In [None]:
import nbformat

# Load the .ipynb file
notebook_path = "amazon.ipynb"

with open(notebook_path, "r", encoding="utf-8") as notebook_file:
    notebook_content = nbformat.read(notebook_file, as_version=4)

# Initialize a list to store questions and code-based answers
questions_and_code_answers = []

# Initialize variables to track the current question and code-based answer
current_question = None
current_code_answer = []

# Iterate through the cells in the notebook
for cell in notebook_content.cells:
    if cell.cell_type == "code":
        cell_source = cell.source.strip()

        if current_question and cell_source:
            # If we are in a question section and have code content, treat it as part of the answer
            current_code_answer.append(cell_source)
            
    elif cell.cell_type == "markdown":
        cell_source = cell.source.strip()

        # Check if this Markdown cell contains a question
        if cell_source and cell_source.endswith("?"):
            # If a question is found, store the previous code-based answer
            if current_question:
                questions_and_code_answers.append({
                    "question": current_question,
                    "answer": '\n'.join(current_code_answer)  # Combine code lines with line breaks
                })
                current_code_answer = []

            # Set the current question
            current_question = cell_source
        elif current_question:
            # If not a question, treat it as a continuation of the code-based answer
            current_code_answer.append(cell_source)

    # Add a space between each question and its corresponding answer
    if current_question and not current_code_answer:
        current_code_answer.append("")  # Add an empty line as a separator

# Append the last code-based answer
if current_question:
    questions_and_code_answers.append({
        "question": current_question,
        "answer": '\n'.join(current_code_answer)
    })

# Now you have a list of dictionaries containing questions and code-based answers with proper indentation and space

In [None]:
import requests

# Set up your AI21 API key


# Define the API endpoint for evaluation



# Define the Python code you want to evaluate
template  = """
num = int(input("enter a number u wnat to check"))
 
if num > 0:
    print("this is a positive number")
    if num == 0:
        print("this is a zero")
else:
    print("this is a negative number")
"""

# Define a question or prompt (optional)
question = """Is this Python code correct?
{code}
"""

# Prepare the data for the API request
payload = {
    "prompt": f"{question}\nCode:\n{code}\n",
}
# Send a POST request to the AI21 API for evaluation
# response = requests.post(api_url, json=payload)

# # Parse the API response
# response_data = response.json()

# # Extract the evaluation result
# evaluation_result = response_data.get("result", "Evaluation failed")

# Print the evaluation result
# +
# print(f"Python Code:\n{code}\nEvaluation: {evaluation_result}")



from langchain.llms import OpenAI

llm = OpenAI(temperature=0.9)
payload['prompt']
from langchain.prompts import PromptTemplate
prompt = PromptTemplate(
    input_variables=['code'],
    template=question,
)
print(llm(prompt.format(code=template)))

In [20]:
import nbformat

In [21]:
with open("amazon.ipynb", "r", encoding="utf-8") as notebook_file:
    notebook_content = nbformat.read(notebook_file, as_version=4)

In [33]:
for cell in notebook_content.cells:
    # print(cell)
    if cell.cell_type == "code":
        print(cell.source)

from bs4 import BeautifulSoup
import requests
search = input("Enter the product name: ")
url = f"https://www.amazon.in/s?k={search}"
header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
page = requests.get(url, headers=header)
page.status_code
soup = BeautifulSoup(page.content, "html.parser")
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("https://raw.githubusercontent.com/kanchitank/Mushroom-Classification/master/mushrooms.csv")
df['class'].value_counts()
count = df['class'].value_counts()

plt.figure(figsize=(8, 7))
sns.barplot(count.index, count.values, alpha=0.8, palette="prism")
plt.ylabel('Count', fontsize=12)
plt.xlabel('Class', fontsize=12)
plt.title('Number of poisonous/edible mushrooms')
class AgeValidationException(Exception):
    def __init__(self, msg):
        self.msg = msg


def age_validation(age):
    if age < 0:
        raise AgeValidation