##### Copyright 2023 Google LLC

In [None]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Setup

In [10]:
!pip install -U -q "google-generativeai>=0.8.2"

In [3]:
# import necessary modules.

import google.generativeai as genai

import base64
import json

try:
    # Mount google drive
    from google.colab import drive

    drive.mount("/gdrive")

    # The SDK will automatically read it from the GOOGLE_API_KEY environment variable.
    # In Colab get the key from Colab-secrets ("🔑" in the left panel).
    import os
    from google.colab import userdata

    os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
except ImportError:
    pass

# Parse the arguments

model = "gemini-1.5-flash"  # @param {isTemplate: true}
# contents_b64 = b'W3sicGFydHMiOiBbeyJ0ZXh0IjogIkhlbGxvIn1dfV0='
# generation_config_b64 = "e30="  # @param {isTemplate: true}
# safety_settings_b64 = "e30="  # @param {isTemplate: true}

# contents = json.loads(base64.b64decode(contents_b64))

# generation_config = json.loads(base64.b64decode(generation_config_b64))
# safety_settings = json.loads(base64.b64decode(safety_settings_b64))

stream = False

# print(json.dumps(contents, indent=4))

Mounted at /gdrive
[
    {
        "parts": [
            {
                "text": "Hello"
            }
        ]
    }
]


In [24]:
import os
import pandas as pd
from PIL import Image
from google.colab import drive
import os
from google.colab import userdata
import google.generativeai as genai
import time

genai.configure(api_key="AIzaSyDtXGdUvHe4_vN-6100EEvnHcrNXCa6GlA")


drive.mount("/gdrive")

print(os.listdir("/gdrive/My Drive"))


generation_config = {
  "max_output_tokens": 16384,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel("gemini-2.0-flash", generation_config = generation_config)


def fetch_openai_response(SYSTEM_PROMPT_PATH, scene_graph, frame_imgs, bev_img, referring_expressions):

    with open(SYSTEM_PROMPT_PATH, 'r') as file:
        system_prompt_data = file.read()
    #print(system_prompt_data + "\nHere is the scene graph: " + str(scene_graph["object_info"]) + "\n" + "Here is the referring expressions: " + referring_expressions + "\nThe subsequent images include a Bird Eye View image as the first, followed by 8 frames extracted from the scene video. Please return the answer.")
    messages = [system_prompt_data + "\nHere is the scene graph: " + str(scene_graph["object_info"]) + "\n" + "Here is the referring expressions: " + referring_expressions + "\nThe subsequent images include a Bird Eye View image as the first, followed by 8 frames extracted from the scene video. Please return the answer.", bev_img]
    for img in frame_imgs:
        messages.append(img)
    response = model.generate_content(messages, stream = False)
    return response.text



SCENE_GRAPH_DIR_BASE = "/gdrive/My Drive/scannet_scenegraph_with_label"

evaluation_level = "area_level"

gt_dataset_path = os.path.join("/gdrive/My Drive", "anywhere3d_" + evaluation_level + ".xlsx")
SYSTEM_PROMPT_PATH = os.path.join("/gdrive/My Drive", "system_prompt_gpt4scene_multiframes_for_" + evaluation_level + ".txt")

result_lis = []

file_gt = pd.read_excel(gt_dataset_path, header = 0, index_col = 0)
for index, row in file_gt.iterrows():
  # print(index, row['scene_id'], row["new_referring_expressions"])
  if row['datasetname'] == "scannet":
      scenegraph_file_path = os.path.join(SCENE_GRAPH_DIR_BASE, row['scene_id'], "scene_graph_with_label.json")
  else:
      continue
  time.sleep(10)
  print(index, row['scene_id'], row["new_referring_expressions"])
  with open(scenegraph_file_path, 'r') as f:
      scenegraph_data = json.load(f)

  img_dir = os.path.join("/gdrive/My Drive/scannet_gpt4scene_data", row['scene_id'])

  all_img_list = os.listdir(img_dir)
  frame_imgs = []
  for f_img in all_img_list:
      if "bev" not in f_img and "concat" not in f_img:
          # with open(os.path.join(img_dir, f_img), "rb") as single_img_file:
          #     single_img_data = single_img_file.read()
          # frame_imgs.append(base64.b64encode(single_img_data).decode('utf-8'))
          frame_imgs.append(Image.open(os.path.join(img_dir, f_img)))


  bev_path = os.path.join(img_dir, "bev.png")
  # with open(bev_path, "rb") as bev_file:
  #     bev = bev_file.read()
  # bev_img = base64.b64encode(bev).decode('utf-8')

  bev_img = Image.open(bev_path)

  pred_bbx_str = fetch_openai_response(SYSTEM_PROMPT_PATH, scenegraph_data, frame_imgs, bev_img, row["new_referring_expressions"])
  #print(pred_bbx_str)

  result_lis.append({
                          "_id": row["_id"],
                          "datasetname": row["datasetname"],
                          "scene_id": row['scene_id'],
                          "cur_referring_expressions_cnt": row["cur_referring_expressions_cnt"],
                          "referring_expressions": row["new_referring_expressions"],
                          "pred_bbx_str": pred_bbx_str,
                          "pred_box_x": 0,
                          "pred_box_y": 0,
                          "pred_box_z": 0,
                          "pred_box_width": 0,
                          "pred_box_length": 0,
                          "pred_box_height": 0
                      })
#print(result_lis)
pred_path = os.path.join("/gdrive/My Drive", "gemini-2.0-flash_prediction_multiframes_" + evaluation_level + "_with_label.xlsx")

df = pd.DataFrame(result_lis)
df.to_excel(pred_path, header = True, index = True)




Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
['NSCLC', 'CRC', 'GSE70580', 'GSE75688', 'GSE72056', 'GSE96993', 'PBMC', 'Spleen_human', 'Spleen_mouse', 'HCC', 'graphformers', 'GraphFormer.ipynb', 'CATs', 'summer intern', '4.1_mois.ipynb', 'Encoder_Decoder_gene_embedding_transformer_attention.ipynb', 'cell_atlas.json', 'model_genes.csv', 'NSCLC_Moana_prediction.tsv', 'Spleen_human_Moana_prediction.tsv', 'Spleen_mouse_Moana_prediction.tsv', 'PBMC_Moana_prediction.tsv', 'CRC_Moana_prediction.tsv', 'GSE70580_Moana_prediction.tsv', 'GSE75688_Moana_prediction.tsv', 'GSE96993_Moana_prediction.tsv', 'GSE72056_Moana_prediction.tsv', 'AMB', 'Baron', 'Segerstolpe', 'Xin', 'Macparland', 'AMB_3label_Moana_prediction.tsv', 'AMB_20label_Moana_prediction.tsv', 'AMB_114label_Moana_prediction.tsv', 'Baron_Moana_prediction.tsv', 'Baron_mouse_Moana_prediction.tsv', 'Segerstolpe_Moana_prediction.tsv', 'Xin_Moana_prediction.tsv', 'Marparlan

## Call `generate_content`

In [None]:


# Call the model and print the response.
gemini = genai.GenerativeModel(model_name=model)

generation_config = {
  "max_output_tokens": 16384,
  "response_mime_type": "text/plain",
}

response = gemini.generate_content(
    contents,
    generation_config=generation_config,
    safety_settings=safety_settings,
    stream=stream,
)



<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://ai.google.dev/gemini-api/docs"><img src="https://ai.google.dev/static/site-assets/images/docs/notebook-site-button.png" height="32" width="32" />Docs on ai.google.dev</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/google-gemini/cookbook/blob/main/quickstarts"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />More notebooks in the Cookbook</a>
  </td>
</table>