In [8]:
# !pip install --upgrade google-genai
# !pip install asyncio
# gcloud auth application-default login

In [9]:
# Example usage:
bucket_name = "fab_public_bucket"
folder_path = "video/thanawyat_al_nasim"

In [10]:
import random


def get_random_region():
    """Returns a random region from the provided list of regions.
    Args:
        regions_list: A list of strings, where each string is a region name.
    Returns:
        A randomly selected region string from the list.
    """
    
    regions_string = "europe-west4, europe-west9, europe-west2, europe-west3, europe-west1, europe-west6, europe-southwest1, europe-west8, europe-north1, europe-central2, us-east5, us-south1, us-central1, us-west4, us-east1, us-east4, us-west1"
    regions_list = [region.strip() for region in regions_string.split(',')]

    return random.choice(regions_list)

In [11]:
from google.cloud import storage

def list_files_in_gcs_folder(bucket_name, folder_path):
    """
    Lists all files (with full path) in a Google Cloud Storage folder.

    Args:
        bucket_name: The name of the GCS bucket.
        folder_path: The path to the folder within the bucket.

    Returns:
        A list of file paths (with full path) in the folder.
    """

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=folder_path)

    file_list = []
    for blob in blobs:
        if blob.name.endswith(".mp4") and "00" in blob.name:
            file_list.append(f"gs://{bucket_name}/{blob.name}")

    return file_list

print(list_files_in_gcs_folder(bucket_name, folder_path))

['gs://fab_public_bucket/video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA-001.mp4', 'gs://fab_public_bucket/video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA-002.mp4', 'gs://fab_public_bucket/video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA-003.mp4', 'gs://fab_public_bucket/video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA-004.mp4', 'gs://fab_public_bucket/video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA-005.mp4', 'gs://fab_public_bucket/video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA-006.mp4', 'gs://fab_public_bucket/video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA-007.mp4']


In [12]:
# import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting

def generate(user_prompt, system_prompt, random_region, video_uri):
    vertexai.init(project="testfab-362608", location=random_region)

    final_response = ''
    model = GenerativeModel(
        "gemini-1.5-pro-002",
        system_instruction=[system_prompt]
    )
    
    video1 = Part.from_uri(
    mime_type="video/mp4",
    uri=video_uri,)
    
    generation_config = {
        "max_output_tokens": 8192,
        "temperature": 1,
        "top_p": 0.95,
        "response_mime_type": "application/json"
    }
    safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
    ]
    
    response = model.generate_content(
        [video1, user_prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=False,
    )

    return response.text

user_prompt = """<INSTRUCTIONS> 
                    Provide: Genre, Categorie and Description (as detailed as possible) for the video. Srictly adhere to the following JSON format and just output data based on teh schema:
                            {'Genre': '(options: Action, Adventure, Comedy, Drama, Fantasy, Horror, Musicals, Mystery, Romance, Science Fiction, Sports, Thriller, Western)',
                             'Categories': '(options: movie, serie, live sport)','
                             'Keywords': '(Identify relevant keywords that are present in the video or closely related to its content. Integrate these keywords naturally within the description if possible.)
                             'Actors_name': '(Identify name of the main actors from the title sequence opeing credits)'
                             'Director_name': '(Identify name of the director from the title sequence opeing credits)'
                             'Current_Language': '(Identify the current - spoken - language of the content)',
                             'Original_Language': '(Identify the original language of the content)',                             
                             'Transcription_availability': '(Language of Subtitles availability if any.)',                             
                             'Description': '(Very detail description of media content)'
                             }
                </INSTRUCTIONS> 

                <OUTPUT Format> 
                JSON
                [
                {
                'Genre': '',
                'Categories': '',
                'Keywords': '',
                'Actors_name': ''
                'Director_name': ''
                'Current_Language': '',
                'Original_Language': '',                             
                'Transcription_availability': '',                             
                'Description':''
                }
                ]
                </OUTPUT Format>"""

# system_prompt = """You are an SEO expert, skilled in optimizing text for search engines. You excel at understanding video content and aligning text with it for maximum searchability."""
system_prompt = """You are  a movie and serie buff passionate and knowledgeable about mvoies and series. You exactly know what is needed to find specific content."""
# system_prompt = """You are an SEO expert, skilled in optimizing text for search engines. You excel at understanding video content and aligning text with it for maximum searchability."""


files = list_files_in_gcs_folder(bucket_name, folder_path)
final_res = {}

for file in files:
    region = get_random_region()
    filename = file.split("/")[-1]
    print(f"Region chosen: {region}")
    res=generate(user_prompt, system_prompt, region,file)
    final_res[filename] = res

Region chosen: europe-west1
Region chosen: europe-west2
Region chosen: us-east5
Region chosen: europe-west8
Region chosen: europe-west6


In [13]:
final_res

{'Thanawyat_Al_Nasim_1_KSA-001.mp4': '{"Genre": "Drama", "Categories": "serie", "Keywords": "Saudi Arabia, family, drama, social issues", "Actors_name": ["Ali", "Amir", "Khalid", "Saad","Haya","Layla"], "Director_name": "Tarig", "Current_Language": "Arabic", "Original_Language": "Arabic", "Transcription_availability": ["Arabic", "English", "French"], "Description": "This dramatic series, set in Saudi Arabia, delves into the complexities of family life and tackles pertinent social issues. The opening sequence introduces us to several key characters. We see a man waking up in his bedroom, followed by a young woman praying in her room, and another woman getting ready for the day. The scenes shift to a man buying bread from a bakery and engaging in a conversation with another man on the street. Later, two men are shown sitting on traditional floor cushions, eating and talking. The dialogue hints at financial difficulties and mentions a debt of 300 riyals. The overall tone suggests a narrat

In [14]:
project_id = 'testfab-362608'

from vertexai.generative_models import GenerationConfig, GenerativeModel

region = get_random_region()
print(f"Region chosen: {region}")

vertexai.init(project=project_id, location=region)

response_schema = {
        "type": "OBJECT",
        "properties": {
            "Genre": {"type": "STRING"},
            "Categories": {"type": "STRING"},
            "Current_Language": {"type": "STRING"},            
            "Original_Language": {"type": "STRING"},            
            "Keywords": {"type": "STRING"},            
            "Actors_name": {"type": "STRING"},
            "Director_name": {"type": "STRING"},
            "Transcription_availability": {"type": "STRING"},                             
            "Description": {"type": "STRING"},
    },
    "required": [
            "Genre",
            "Categories",
            "Current_Language",            
            "Original_Language",            
            "Keywords",            
            "Actors_name",
            "Director_name",
            "Transcription_availability",                             
            "Description"
  ]
}


    # Concatenate Current_Language if they are different. 
    # Concatenate Original_Language if they are different. 


prompt = f"""
    Concatenate Genre if they are different. 
    Concatenate Categories if they are different. 
    Concatenate Keywords if they are different (limits to 10 keywords maximum).
    Concatenate Actors_name if they are different.
    Concatenate Director_name if they are different.
    Concatenate Transcription_availability if they are different.
    Summarize the Description for all the files in 20 words maximum. 
    {str(final_res)}
"""

model = GenerativeModel("gemini-1.5-flash-002")

response = model.generate_content(
    prompt,
    generation_config=GenerationConfig(
        response_mime_type="application/json", response_schema=response_schema
    ),
)

print(response.text)

Region chosen: us-west1
{"Genre": "Drama, Comedy, Thriller", "Categories": "serie", "Current_Language": "Arabic", "Original_Language": "Arabic", "Keywords": "Saudi Arabia, family, drama, social issues, relationships, conflicts, school, work, money, marriage", "Actors_name": "Ali, Amir, Khalid, Saad, Haya, Layla, Unknown, null", "Director_name": "Tarig, Unknown, null", "Transcription_availability": "Arabic, English, French, no", "Description": "Saudi Arabian series exploring family life, social issues, school life, and cultural nuances through comedic and dramatic storylines. "}
