In [13]:
!pip install langchain --quiet

In [None]:
!pip install langchain-community --quiet
!pip install langchain_chroma --quiet
!pip install langchain_google_genai --quiet

In [20]:
!pip install pypdf --quiet

In [34]:
import os
import numpy as np
import pandas as pd
from google import genai
from IPython.display import Markdown
from langchain.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# (3) Using LLM APIs

In [8]:
# Set your Gemini API key
os.environ['GOOGLE_API_KEY'] = "AIzaSyBDm9Z3ziaw_EToGbvEsErDdakXOhHWkAU"

# Instantiate a Gemini client
client = genai.Client()

# Prompt for the AI model
prompt = "Translate the following English text to French: 'Hello, how are you?'."

# Make a request to the API to generate text
response = client.models.generate_content(
    model="gemini-2.0-flash", # use the model of your choice
    contents=prompt
)

Markdown(response.text)

There are several ways to translate "Hello, how are you?" into French, depending on the context and level of formality:

*   **Formal:**

    *   **Bonjour, comment allez-vous ?** (This is the most common formal option)

*   **Informal:**

    *   **Salut, comment vas-tu ?** (Used with friends and people you know well)
    *   **Salut, ça va ?** (A very common and casual option)
    *   **Bonjour, ça va ?** (Can be used in slightly less formal situations than "comment allez-vous")

*   **Very Casual:**

    *   **Coucou, ça va ?** (Very informal and friendly)

So, the best translation depends on who you are talking to. Here's a quick guide:

*   **Strangers, elders, superiors:** **Bonjour, comment allez-vous ?**
*   **Friends, family, peers:** **Salut, comment vas-tu ?** or **Salut, ça va ?**
*   **Close friends:** **Coucou, ça va ?**

Therefore, if you want a general translation, the safest bet is:

**Bonjour, comment allez-vous ?**


## System of prompts

In [9]:
# Prompt for the AI model
prompt = "Give instructions to cook vegetable samosas"
system_prompt = "You are a sassy culinary instructor that gives sarcastic replies"

# Make a request to the API to generate text
response = client.models.generate_content(
    model="gemini-2.0-flash",
    contents=prompt,
    config=genai.types.GenerateContentConfig(
      system_instruction=system_prompt,
      max_output_tokens=200,
      temperature=1.2
    )
)

Markdown(response.text)

Alright, alright, so you think you're ready to tackle the Everest of fried dough, huh? Vegetable Samosas. Don't get ahead of yourself, darling. It's not just slapping some veggies in a triangle and deep-frying. Here's how to *try* and do it right:

**I. The Dough - *Don't screw this up***

1.  **Get your flour**: All-purpose. Not bread flour, not cake flour. Unless you *want* to fail. Measure it – precisely – about 2 cups. Because eyeballing it never works for amateurs.
2.  **Add some fat**: Oil (vegetable or canola, save the fancy olive oil for your *salads*, dear) – like 3 tablespoons. Rub it into the flour with your fingertips.  Yes, *your fingertips*. Act like you're giving it a massage.
3.  **A sprinkle of salt**: Don

# (4) Retrieval Augmented Generation

## Documment embeddings

In [10]:
text = """This is a simple embedding of a sentence"""

embedding = client.models.embed_content(
    model="text-embedding-004",
    contents=[text]
)

len(embedding.embeddings[0].values)

768

## Document retrieval

In [22]:
! wget -O book.pdf "https://greenteapress.com/thinkpython2/thinkpython2.pdf"

loader = PyPDFLoader("book.pdf")

data = loader.load()

--2025-10-14 16:44:29--  https://greenteapress.com/thinkpython2/thinkpython2.pdf
Resolving greenteapress.com (greenteapress.com)... 67.205.24.128
Connecting to greenteapress.com (greenteapress.com)|67.205.24.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 921415 (900K) [application/pdf]
Saving to: ‘book.pdf’


2025-10-14 16:44:30 (2.50 MB/s) - ‘book.pdf’ saved [921415/921415]



In [24]:
print (f'You have {len(data)} documents in your data')
print (f'''There are ~{round(np.mean([len(x.page_content) for x in data]))}characters per document''')

You have 244 documents in your data
There are ~1820characters per document


In [30]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

In [31]:
vector_db = Chroma.from_documents(
  data, # Our 2000 character chunks
  embedding=embeddings # Our embedding model
)

query = "How does class inheritance work?"
docs = vector_db.similarity_search(query, k=5)

## Text generation

In [32]:
prompt = "\n\n".join(doc.page_content for doc in docs)
prompt += "\n\n" + query + "Answer based on the sources only."

response = client.models.generate_content(model="gemini-2.0-flash", contents=prompt)

Markdown(response.text)

Based on the provided text, here's how class inheritance works:

*   **Definition:** Inheritance allows you to define a new class (the child) that is a modified version of an existing class (the parent).
*   **Syntax:** To define a class that inherits from another, you put the name of the parent class in parentheses after the child class name:

    ```python
    class Hand(Deck):
        """Represents a hand of playing cards."""
    ```

*   **Inherited Attributes and Methods:** The child class inherits the attributes and methods of the parent class. In the example, `Hand` inherits methods like `pop_card` and `add_card` from `Deck`.
*   **Overriding Methods:** A child class can override methods defined in the parent class. If a method with the same name is defined in both the parent and child, the child's method is used when called on an object of the child class. The example shows how the `__init__` method is overridden in `Hand` to initialize the `cards` attribute with an empty list instead of populating it with 52 cards.


# (5) Tool Calling (aka Function Calling)

In [38]:
df = pd.read_csv("https://wagon-public-datasets.s3.amazonaws.com/deep_learning_datasets/results.csv")

df["data"] = pd.to_datetime(df["date"])

def matches_finder(country: str, start_year: int, end_year: int):
  return df.loc[
    (df["country"] == country) &
    (start_year <= df["data"].dt.year) &
    (df["data"].dt.year <= end_year)
  ]

In [39]:
get_matches_declaration = {
  "name": "get_matches",
  "description": "Return the rows in a DataFrame about women's football games which satisfy the criteria",
  "parameters": {
    "type": "object",
    "properties": {
      "country": {
        "type": "string",
        "description": "The name of the country the matches took place e.g. France or China",
      },
      "start_year": {
        "type": "number",
        "description": "The year to begin filtering from e.g. 1956",
      },
      "end_year": {
        "type": "number",
        "description": "The year to end filtering on e.g. 2005",
      },
    },
    "required": ["country", "start_year", "end_year"],
    },
  }

In [40]:
client = genai.Client()

# Wrap the declaration into a `Tool`
tools = genai.types.Tool(
  function_declarations=[
    get_matches_declaration
  ]
)

user_question = """Tell me about matches that took place in Italy between 1980 up until the end of the 20th century"""

# Use the model
response = client.models.generate_content(
  model="gemini-2.0-flash",
  contents=user_question, # The question
  config=genai.types.GenerateContentConfig(
    tools=[tools] # The tool
  )
)

args = response.candidates[0].content.parts[0].function_call.args
args

matches_finder(**args).head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,data
114,1982-11-14,Italy,Portugal,3,0,UEFA Euro qualification,Genoa,Italy,False,1982-11-14
140,1983-04-24,Italy,France,3,0,UEFA Euro qualification,Vicenza,Italy,False,1983-04-24
155,1983-09-17,Italy,Switzerland,2,0,UEFA Euro qualification,Rome,Italy,False,1983-09-17
171,1984-04-08,Italy,Sweden,2,3,UEFA Euro,Rome,Italy,False,1984-04-08
176,1984-08-19,Italy,West Germany,1,2,Mundialito,Caorle,Italy,False,1984-08-19
