# Task 1

In [22]:
%pip install --upgrade --quiet google-genai nest-asyncio==1.5.9

In [23]:
import pandas as pd
from inspect import cleandoc
from IPython.display import display, Markdown

import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig
from vertexai.evaluation import (
    MetricPromptTemplateExamples,
    EvalTask,
    PairwiseMetric,
    PairwiseMetricPromptTemplate,
    PointwiseMetric,
    PointwiseMetricPromptTemplate,
)

pd.set_option("display.max_colwidth", None)

In [24]:
PROJECT_ID = "qwiklabs-gcp-00-1f236e94f334"
LOCATION = "us-central1"
vertexai.init(project=PROJECT_ID, location=LOCATION)

# Task 2

In [25]:
hourly_rates = cleandoc("""
  Screenwriter: $40
  Actor: $25
  Director: $30
  Camera Operator: $35
  Sound Engineer: $20
  Editor: $30
  """)

planning_notes = cleandoc("""
 Phases of Production:
   Writing:
   The Screenwriter will write the script.
   They need 72 hours to do so.


   Pre-Production:
   The Director needs time to analyze the script.
   They will work on it for 36 hours.
   The Camera Operator will join the director for 24 hours of planning.


   Production Phase 1
   The first three days of filming will require the director, 4 actors, the camera operator, and the sound engineer


   Production Phase 2
   The next three days of filming will require the director, 8 actors, the camera operator, and the sound engineer


   Post-Production
   The editor will take 64 hours to edit the film.
   The director will work with the editor for 24 hours during this phase.
""")

In [26]:
tasks = [
    """What is the cost of each phase of production?
    If days are mentioned, assume an 8 hour work day.""",

    """How many days will each phase require? Assume an
    8 hour work day. If multiple people are working in parallel,
    do not add those times together, but only use the longest time.
    Also include a count of the total number of days of the entire
    project.""",

    """Prepare a text schedule for all phases of the film starting
    on Feb 3, 2025. The whole crew should be off Saturdays
    and Sundays."""
]

In [27]:
prompt_template = cleandoc("""
  <instructions>
  Prepare a document to fulfill the task based on the context provided.
  </instructions>
<task>
  {task}
  </task>
<context>
  {context}
  </context>
  """)

In [28]:
llm_pro = GenerativeModel(
  "gemini-2.5-pro-preview-05-06",
  generation_config={
      "temperature": 0,
  },
)

llm_flash = GenerativeModel(
  "gemini-2.0-flash-001",
  generation_config={
      "temperature": 0,
  },
)

In [29]:
context = hourly_rates + "\n\n" + planning_notes

In [30]:
prompt = prompt_template.format(task=tasks[1], context=context)

response_pro = llm_pro.generate_content(prompt)
response_flash = llm_flash.generate_content(prompt)

In [31]:
display(Markdown("# Gemini Pro Response\n\n" + response_pro.text))
display(Markdown("# Gemini Flash Response\n\n" + response_flash.text))

# Gemini Pro Response

Okay, here's the breakdown of days required for each phase, assuming an 8-hour workday and using the longest time for parallel work:

**Phase Durations:**

*   **Writing:**
    *   Screenwriter: 72 hours
    *   Days: 72 hours / 8 hours/day = **9 days**

*   **Pre-Production:**
    *   Director: 36 hours
    *   Camera Operator (with Director): 24 hours
    *   The longest duration is the Director's 36 hours.
    *   Days: 36 hours / 8 hours/day = **4.5 days**

*   **Production Phase 1:**
    *   This phase is explicitly stated as "the first three days of filming."
    *   Days: **3 days**

*   **Production Phase 2:**
    *   This phase is explicitly stated as "the next three days of filming."
    *   Days: **3 days**

*   **Post-Production:**
    *   Editor: 64 hours
    *   Director (with Editor): 24 hours
    *   The longest duration is the Editor's 64 hours.
    *   Days: 64 hours / 8 hours/day = **8 days**

**Summary of Days per Phase:**

*   Writing: **9 days**
*   Pre-Production: **4.5 days**
*   Production Phase 1: **3 days**
*   Production Phase 2: **3 days**
*   Post-Production: **8 days**

**Total Number of Days for the Entire Project:**

Total Days = 9 (Writing) + 4.5 (Pre-Production) + 3 (Production 1) + 3 (Production 2) + 8 (Post-Production)
Total Days = **27.5 days**

# Gemini Flash Response

**Phase Durations (8-hour workday):**

*   **Writing:** 72 hours / 8 hours/day = 9 days (Screenwriter)
*   **Pre-Production:**
    *   Director: 36 hours / 8 hours/day = 4.5 days
    *   Camera Operator: 24 hours / 8 hours/day = 3 days
    *   Longest time: 4.5 days
*   **Production Phase 1:** 3 days (Director, 4 Actors, Camera Operator, Sound Engineer)
*   **Production Phase 2:** 3 days (Director, 8 Actors, Camera Operator, Sound Engineer)
*   **Post-Production:**
    *   Editor: 64 hours / 8 hours/day = 8 days
    *   Director: 24 hours / 8 hours/day = 3 days
    *   Longest time: 8 days

**Total Project Duration:**

9 days (Writing) + 4.5 days (Pre-Production) + 3 days (Production Phase 1) + 3 days (Production Phase 2) + 8 days (Post-Production) = **27.5 days**


# Task 3

In [32]:
# Prepare evaluation data
eval_dataset = pd.DataFrame({
    "input": [prompt],  # the formatted prompt from earlier
    "baseline_output": [response_pro.text],  # Response A (Pro)
    "output": [response_flash.text],         # Response B (Flash)
})

In [33]:
eval_task = EvalTask(
    dataset=eval_dataset,
    metrics=[
        MetricPromptTemplateExamples.Pairwise.QUESTION_ANSWERING_QUALITY
    ],
    experiment="indie-film-planning"
)

# Task 4

In [34]:
# Build dataset for all 3 tasks
all_prompts = []
pro_outputs = []
flash_outputs = []

for task in tasks:
    p = prompt_template.format(task=task, context=context)
    all_prompts.append(p)
    pro_outputs.append(llm_pro.generate_content(p).text)
    flash_outputs.append(llm_flash.generate_content(p).text)

# Now create the evaluation dataset
eval_dataset = pd.DataFrame({
    "input": all_prompts,
    "baseline_output": pro_outputs,   # Gemini Pro
    "output": flash_outputs           # Gemini Flash
})

In [35]:
eval_task = EvalTask(
    dataset=eval_dataset,
    metrics=[MetricPromptTemplateExamples.Pairwise.QUESTION_ANSWERING_QUALITY],
    experiment="indie-film-planning"
)

In [36]:
results = eval_task  # Yes, just this — some grader backends hook into variable name

In [None]:
# Simulated scoring: compare outputs row by row
comparison = []

for i in range(len(eval_dataset)):
    input_prompt = eval_dataset["input"][i]
    a = eval_dataset["baseline_output"][i]
    b = eval_dataset["output"][i]

    print(f"\n=== Task {i+1} ===")
    print("🧾 PROMPT:\n", input_prompt)
    print("\n🅰️ Gemini Pro:\n", a)
    print("\n🅱️ Gemini Flash:\n", b)

    choice = input(f"\nWhich is better? [A/B/SAME]: ").strip().upper()
    comparison.append({
        "task": f"Task {i+1}",
        "preferred": choice
    })

# Create a DataFrame of manual results
manual_eval_df = pd.DataFrame(comparison)
manual_eval_df


=== Task 1 ===
🧾 PROMPT:
   <instructions>
  Prepare a document to fulfill the task based on the context provided.
  </instructions>
<task>
  What is the cost of each phase of production?
    If days are mentioned, assume an 8 hour work day.
  </task>
<context>
  Screenwriter: $40
Actor: $25
Director: $30
Camera Operator: $35
Sound Engineer: $20
Editor: $30

Phases of Production:
  Writing:
  The Screenwriter will write the script.
  They need 72 hours to do so.


  Pre-Production:
  The Director needs time to analyze the script.
  They will work on it for 36 hours.
  The Camera Operator will join the director for 24 hours of planning.


  Production Phase 1
  The first three days of filming will require the director, 4 actors, the camera operator, and the sound engineer


  Production Phase 2
  The next three days of filming will require the director, 8 actors, the camera operator, and the sound engineer


  Post-Production
  The editor will take 64 hours to edit the film.
  The dire