In [50]:
import IPython
from dotenv import load_dotenv
import os 
import vertexai
from IPython.display import Markdown, Video, display 
import pandas as pd
from io import StringIO
import json

from vertexai.preview.generative_models import (
    GenerationConfig,
    GenerativeModel,
    HarmCategory,
    HarmBlockThreshold,
    Part,
    Tool,
    FunctionDeclaration,
)

In [51]:
load_dotenv()

True

In [52]:
project_id = os.getenv('PROJECT_ID')
location = "us-west1"

vertexai.init(project=project_id, location=location)

In [53]:
model = GenerativeModel("gemini-1.5-pro-001")

generation_config = GenerationConfig(temperature=1, top_p=0.95)

safety_settings = {
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

In [54]:
instructions = """  
Please return JSON of the vernacular name of the fish and coral species, the scientific name, and the best timestamp of the frame identified from the video. Use this
example following the schema:

    {"vernacular name": str, "scientific name": str, "timestamp": str}

    All fields are required.

               """

In [55]:
def get_url_from_gcs(gcs_uri: str) -> str:
    return gcs_uri.replace("gs://", "https://storage.googleapis.com/").replace(
        " ", "%20"
    )

In [56]:
video_uri = 'gs://fish-dataset-test/sharktest2.mp4'

contents = [
    Part.from_uri(
        uri=video_uri,
        mime_type="video/mp4",
    ),
    instructions
]

display(Video(get_url_from_gcs(video_uri)))

In [70]:
response = model.generate_content(contents, generation_config=generation_config)



In [71]:
print(response.text)

```json
{"vernacular name": "Pelagic thresher", "scientific name": "Alopias pelagicus", "timestamp": "0:07"}
```


In [96]:
data = response.text.replace('*', '').replace("\n", "").replace('json', "").replace("`", "")

In [106]:
data = json.loads(data)

In [107]:
data

{'vernacular name': 'Pelagic thresher',
 'scientific name': 'Alopias pelagicus',
 'timestamp': '0:07'}

In [108]:
df = pd.DataFrame(data, index=[0])

In [109]:
df

Unnamed: 0,vernacular name,scientific name,timestamp
0,Pelagic thresher,Alopias pelagicus,0:07
