In [3]:
import IPython
from dotenv import load_dotenv
import os 
import vertexai
from IPython.display import Markdown, Video, display 
import pandas as pd
from io import StringIO
import json

from vertexai.preview.generative_models import (
    GenerationConfig,
    GenerativeModel,
    HarmCategory,
    HarmBlockThreshold,
    Part,
    Tool,
    FunctionDeclaration,
)

In [4]:
load_dotenv()

True

In [5]:
project_id = os.getenv('PROJECT_ID')
location = "us-west1"

vertexai.init(project=project_id, location=location)

In [6]:
model = GenerativeModel("gemini-1.5-pro-001")

generation_config = GenerationConfig(temperature=1, top_p=0.95)

safety_settings = {
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

In [7]:
instructions = """  
Please return JSON of the vernacular name of the fish and coral species, the scientific name, and the initial timestamp identified from the video. Use this
example following the schema:

    {"vernacular name": str, "scientific name": str, "timestamp": str}

    All fields are required.

               """

In [8]:
def get_url_from_gcs(gcs_uri: str) -> str:
    return gcs_uri.replace("gs://", "https://storage.googleapis.com/").replace(
        " ", "%20"
    )

In [9]:
video_uri = 'gs://fish-dataset-test/fish_data6.mp4'

video_url = 'https://www.youtube.com/watch?v=PmiMsvhxP9A'

contents = [
    Part.from_uri(
        uri=video_uri,
        mime_type="video/mp4",
    ),
    instructions
]

display(Video(get_url_from_gcs(video_uri)))

In [10]:
response = model.generate_content(contents, generation_config=generation_config)



I0000 00:00:1723939126.077977 10690892 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [11]:
print(response.text)

```json
[
    {"vernacular name": "Reticulated dascyllus", "scientific name": "Dascyllus reticulatus", "timestamp": "0:00"},
    {"vernacular name": "Black and white fish", "scientific name": "Dascyllus arustus", "timestamp": "0:05"},
    {"vernacular name": "Royal angelfish", "scientific name": "Pygoplites diacanthus", "timestamp": "0:25"},
    {"vernacular name": "Orange fish", "scientific name": "Centropyge aurantia", "timestamp": "0:38"},
    {"vernacular name": "Staghorn coral", "scientific name": "Acropora cervicornis", "timestamp": "0:41"}
]
```


In [12]:
data = response.text.replace('*', '').replace("\n", "").replace('json', "").replace("`", "")

In [13]:
data

'[    {"vernacular name": "Reticulated dascyllus", "scientific name": "Dascyllus reticulatus", "timestamp": "0:00"},    {"vernacular name": "Black and white fish", "scientific name": "Dascyllus arustus", "timestamp": "0:05"},    {"vernacular name": "Royal angelfish", "scientific name": "Pygoplites diacanthus", "timestamp": "0:25"},    {"vernacular name": "Orange fish", "scientific name": "Centropyge aurantia", "timestamp": "0:38"},    {"vernacular name": "Staghorn coral", "scientific name": "Acropora cervicornis", "timestamp": "0:41"}]'

In [14]:

json_data = json.dumps(json.loads(data), indent=4)

In [15]:
json_buffer = StringIO(json_data)

In [16]:
df = pd.read_json(json_buffer)

In [17]:
df

Unnamed: 0,vernacular name,scientific name,timestamp
0,Reticulated dascyllus,Dascyllus reticulatus,2024-08-17 00:00:00
1,Black and white fish,Dascyllus arustus,2024-08-17 00:05:00
2,Royal angelfish,Pygoplites diacanthus,2024-08-17 00:25:00
3,Orange fish,Centropyge aurantia,2024-08-17 00:38:00
4,Staghorn coral,Acropora cervicornis,2024-08-17 00:41:00
