In [None]:
%pip install -Uq "google-genai==1.7.0"

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.9/100.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
from google import genai
from google.genai import types

from IPython.display import Markdown, display

genai.__version__


'1.7.0'

In [7]:
from kaggle_secrets import UserSecretsClient

client = genai.Client(api_key=UserSecretsClient().get_secret("GOOGLE_API_KEY"))


In [9]:
from google.api_core import retry

is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

if not hasattr(genai.models.Models.generate_content, '__wrapped__'):
  genai.models.Models.generate_content = retry.Retry(
      predicate=is_retriable)(genai.models.Models.generate_content)


In [10]:
!wget -nv -O gemini.pdf https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf

document_file = client.files.upload(file='gemini.pdf')


2025-04-01 11:24:50 URL:https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf [7228817/7228817] -> "gemini.pdf" [1]


In [14]:
request = 'Tell me about the training process used here.'

def summarise_doc(request: str) -> str:
  """Execute the request on the uploaded document."""
  # Set the temperature low to stabilise the output.
  config = types.GenerateContentConfig(temperature=2.0)
  response = client.models.generate_content(
      model='gemini-2.0-flash',
      config=config,
      contents=[request, document_file],
  )

  return response.text

summary = summarise_doc(request)
Markdown(summary)


Certainly! Let's analyze the training process outlined in this document for Gemini 1.5.

**Training Infrastructure:**

*   **TPUs:**  The Gemini 1.5 Pro model is trained on Google's Tensor Processing Units (TPUs), specifically TPUv4, which suggests large-scale, purpose-built hardware for accelerating the training. 4096-chip pods used during training, which highlights a compute-heavy and distributed training strategy.

**Datasets and Methods:**

*   **Multimodal Training:** The model was pre-trained on diverse, sourced from various domains including web documents, code, and data across modalities of data (images, audio, video content).

*   **Multilingual Training:** Pre-training involved training on multimodal and multilingual datasets.

*   **Instruction tuning /Fine Tuning Phase:** After pre-training, a finetuning was undertaken which used supervised fine tuning using instructions, on a multimodal set and was supplemented further based upon Human Preference Data

**Key Techniques:**

*   **Mixture of Experts (MoE) Architecture:**  Gemini 1.5 Pro uses MoE. The architecture directs inputs to only a subset of the network's parameters, for which parameters have a fixed compute allocation.

I hope this breakdown is helpful!

In [26]:
import collections
import itertools

# Number of times to repeat each task in order to reduce error and calculate an average.
# Increasing it will take longer but give better results, try 2 or 3 to start.
NUM_ITERATIONS = 1

scores = collections.defaultdict(int)
responses = collections.defaultdict(list)

for question in questions:
  display(Markdown(f'## {question}'))
  for guidance, guide_prompt in guidance_options.items():

    for n in range(NUM_ITERATIONS):
      # Generate a response.
      answer = answer_question(question, guide_prompt)
      # Evaluate the response (note that the guidance prompt is not passed).
      written_eval, struct_eval = eval_answer(question, answer, n)
      print(f'{guidance}: {struct_eval}')

      # Save the numeric score.
      scores[guidance] += int(struct_eval.value)

      # Save the responses, in case you wish to inspect them.
      responses[(guidance, question)].append((answer, written_eval))



## What is 2+2?

no_help: StructEval(value=4)
with_help: StructEval(value=4)


## Explain gravity in simple terms.

no_help: StructEval(value=4)
with_help: StructEval(value=4)


In [31]:
from enum import Enum

class AnswerRating(Enum):
    POOR = "1"
    AVERAGE = "2"
    GOOD = "3"
    EXCELLENT = "4"

    @classmethod
    def from_score(cls, score):
        rounded = str(round(score))  # Convert rounded score to string
        return cls(rounded) if rounded in cls._value2member_map_ else cls.POOR  # Default to POOR



In [32]:
for guidance, score in scores.items():
  avg_score = score / (NUM_ITERATIONS * len(questions))
  nearest = AnswerRating(str(round(avg_score)))
  print(f'{guidance}: {avg_score:.2f} - {nearest.name}')

no_help: 4.00 - EXCELLENT
with_help: 4.00 - EXCELLENT


In [8]:
import functools

NUM_ITERATIONS = 1  # or any default value

@functools.total_ordering
class QAGuidancePrompt:
  """A question-answering guidance prompt or system instruction."""

  def __init__(self, prompt, questions, n_comparisons=NUM_ITERATIONS):
    """Create the prompt. Provide questions to evaluate against, and number of evals to perform."""
    self.prompt = prompt
    self.questions = questions
    self.n = n_comparisons

  def __str__(self):
    return self.prompt

  def _compare_all(self, other):
    """Compare two prompts on all questions over n trials."""
    results = [self._compare_n(other, q) for q in questions]
    mean = sum(results) / len(results)
    return round(mean)

  def _compare_n(self, other, question):
    """Compare two prompts on a question over n trials."""
    results = [self._compare(other, question, n) for n in range(self.n)]
    mean = sum(results) / len(results)
    return mean

  def _compare(self, other, question, n=1):
    """Compare two prompts on a single question."""
    answer_a = answer_question(question, self.prompt)
    answer_b = answer_question(question, other.prompt)

    _, result = eval_pairwise(
        prompt=question,
        response_a=answer_a,
        response_b=answer_b,
        n=n,  # Cache buster
    )
    # print(f'q[{question}], a[{self.prompt[:20]}...], b[{other.prompt[:20]}...]: {result}')

    # Convert the enum to the standard Python numeric comparison values.
    if result is AnswerComparison.A:
      return 1
    elif result is AnswerComparison.B:
      return -1
    else:
      return 0

  def __eq__(self, other):
    """Equality check that performs pairwise evaluation."""
    if not isinstance(other, QAGuidancePrompt):
      return NotImplemented

    return self._compare_all(other) == 0

  def __lt__(self, other):
    """Ordering check that performs pairwise evaluation."""
    if not isinstance(other, QAGuidancePrompt):
      return NotImplemented

    return self._compare_all(other) < 0
