In [None]:
!pip install --upgrade google-generativeai

Collecting google-generativeai
  Downloading google_generativeai-0.8.5-py3-none-any.whl.metadata (3.9 kB)
Downloading google_generativeai-0.8.5-py3-none-any.whl (155 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.4/155.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-generativeai
  Attempting uninstall: google-generativeai
    Found existing installation: google-generativeai 0.8.4
    Uninstalling google-generativeai-0.8.4:
      Successfully uninstalled google-generativeai-0.8.4
Successfully installed google-generativeai-0.8.5


In [None]:
# Step 1: Import libraries
import cv2
import os
import re
import json
import pandas as pd
from PIL import Image
from google.colab import files
import google.generativeai as genai

# Step 2: Upload videos
uploaded = files.upload()
video_files = list(uploaded.keys())

# Step 3: Extract frames from video
def extract_frames(video_path, num_frames=3):
    vidcap = cv2.VideoCapture(video_path)
    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(1, total_frames // num_frames)
    extracted = []

    for i in range(num_frames):
        frame_num = i * step
        vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        success, image = vidcap.read()
        if success:
            frame_file = f"{os.path.splitext(video_path)[0]}_frame_{i}.jpg"
            cv2.imwrite(frame_file, image)
            extracted.append(frame_file)

    vidcap.release()
    return extracted

# Step 4: Configure Gemini API
GEMINI_API_KEY = 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg'
genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel(
    model_name="gemini-1.5-pro-latest",
    generation_config=genai.types.GenerationConfig(temperature=0.1)
)



# Step 6: Build prompt string
prompt = "You are a research assistant specialized in analyzing multimodal data (textual transcripts, audio cues, and visual content) extracted from TikTok/YouTube videos. Your primary task is to analyze videos that discuss personal experiences before, during, and after antidepressant usage. As a health researcher investigating medication experiences shared on TikTok/YouTube, I require you to carefully examine each video individually and provide structured responses addressing the provided analytical questions. Be consistent in your analysis. Avoid changing your response unless there is strong evidence to do so.\nPlease answer the following questions in strict Q&A format."
prompt += "\nEach question must begin with 'Q:' and each answer must begin with 'A:'. Do not skip any question.\n\n"


# Step 5: Load structured prompt
prompt_data=[
    {
        "category": "TEXTUAL",
        "question": "Did the user experience any side effects from the antidepressant?",
        "hint": "Covers any physical or psychological side effects."
    },
    {
        "category": "TEXTUAL",
        "question": "Did the user report physical side effects (e.g., nausea, fatigue, dizziness)?",
        "hint": "Focus on body-related effects."
    },
    {
        "category": "TEXTUAL",
        "question": "Did the user report psychological side effects (e.g., anxiety, numbness, agitation)?",
        "hint": "Includes emotional or mental effects."
    },
    {
        "category": "TEXTUAL",
        "question": "Did the user report overall improvement while on medication?",
        "hint": "Positive outcomes like better mood, focus, energy."
    },
    {
        "category": "TEXTUAL",
        "question": "Did the user stop taking antidepressants?",
        "hint": "For any reason — voluntary or due to issues."
    },
    {
        "category": "TEXTUAL",
        "question": "After stopping antidepressants, did the user experience withdrawal symptoms?",
        "hint": "Includes brain zaps, nausea, dizziness, etc."
    },
    {
        "category": "TEXTUAL",
        "question": "Did the user advocate for or warn against antidepressants?",
        "hint": "Encouraging or discouraging others about meds."
    },
    {
        "category": "TEXTUAL",
        "question": "What was the stated reason for starting antidepressants? Has the user mentioned it?",
        "hint": "Use the clearest reason the speaker provides or implies. Choose from: Depression, Anxiety, Panic Attacks, OCD, PTSD, Social Anxiety, Postpartum, Stress/Burnout, Grief/Loss, Not Mentioned"
    },
    {
        "category": "TEXTUAL",
        "question": "What was the overall outcome described by the user?",
        "hint": "Summarize the speaker's reflection. Choose from: Positive Outcome, Mixed Outcome, Negative Outcome, Still Ongoing, Not Mentioned"
    },
    {
        "category": "TEXTUAL",
        "question": "What antidepressant medication(s) does the user mention by name?",
        "hint": "List all specific drugs mentioned, if any."
    },
    {
        "category": "TEXTUAL",
        "question": "Does the user mention dosage of medication?",
        "hint": "Include dose in mg or number of pills if stated."
    },
    {
        "category": "TEXTUAL",
        "question": "Does the user mention a change in medication dosage? Specify details if yes",
        "hint": "Mention any change in dosage."
    },
    {
        "category": "TEXTUAL",
        "question": "Did the user mention length of usage for medication?",
        "hint": "State the duration in weeks, months, or years."
    },
    {
        "category": "TEXTUAL",
        "question": "What alternative treatments did the user try, if any?",
        "hint": "May include therapy, herbal supplements, or lifestyle changes."
    },
    {
        "category": "TEXTUAL",
        "question": "Did the speaker describe how antidepressants impacted their daily life?",
        "hint": "Summarize reported changes in routine or ability."
    },
    {
        "category": "TEXTUAL",
        "question": "What emotional tone does the user convey throughout the video?",
        "hint": "E.g., hopeful, defeated, sarcastic, confused."
    },
    {
        "category": "VISUAL",
        "question": "Is the video recorded indoors?",
        "hint": "Look for walls, lighting, furniture, or ceiling to determine an indoor setting."
    },
    {
        "category": "VISUAL",
        "question": "Is the video recorded outdoors?",
        "hint": "Presence of natural lighting, sky, or vegetation may indicate outdoor filming."
    },
    {
        "category": "VISUAL",
        "question": "Is the visual environment a medical or clinical setting (e.g., hospital, therapist’s office)?",
        "hint": "Look for clinical furniture, white walls, or examination tables."
    },
    {
        "category": "VISUAL",
        "question": "Does the visual text include the name of a medication?",
        "hint": "E.g., Zoloft, Prozac, Lexapro, etc."
    },
    {
        "category": "AUDIO",
        "question": "Analyze the tone, pitch, and emotional expression of the speaker's voice in the audio. Based on your analysis, categorize the overall sentiment as one of the following: Happy, Sad, Stressed, or Neutral.",
        "hint": "Pay attention to vocal modulation and expressiveness."
    },
    {
        "category": "AUDIO",
        "question": "Listen to the background music in the video and analyze its emotional tone. Based on the rhythm, melody, and instrumentation, classify the mood of the music as one of the following: Happy, Sad, Tense, or None (if no music is present).",
        "hint": "Reflect on emotional tone conveyed through musical elements."
    },
    {
        "category": "AUDIO",
        "question": "Is there any swearing or profanity used in the video? If so, give examples.",
        "hint": "Look for offensive or explicit language."
    },
    {
        "category": "BEHAVIORAL",
        "question": "What is the age group or demographic profile of the person speaking in the video? And is the person male or female?",
        "hint": "Categorize as Child, Teenager, Adult, Elderly."
    },
    {
        "category": "BEHAVIORAL",
        "question": "Is the person in the video a content creator/influencer or a healthcare professional such as a nurse or therapist?",
        "hint": "Determine professional vs. personal voice."
    },
    {
        "category": "BEHAVIORAL",
        "question": "Are suicidal thoughts or tendencies discussed in the video? Summarize",
        "hint": "Handle with sensitivity and accuracy."
    },
    {
        "category": "BEHAVIORAL",
        "question": "Does the speaker explicitly encourage viewers to seek professional help or consult a healthcare provider?",
        "hint": "Yes/No; briefly specify if Yes."
    },
    {
        "category": "MEDICAL",
        "question": "Did the person report improvement after stopping the medication?"
    },
    {
        "category": "MEDICAL",
        "question": "Did the person try multiple medications and identify which one worked best for them?"
    },
    {
        "category": "MEDICAL",
        "question": "Was the medication recommended by a therapist or healthcare professional?"
    },
    {
        "category": "MEDICAL",
        "question": "Does the speaker mention interactions of their medication with other substances (e.g., alcohol, caffeine)?",
        "hint": "Yes/No; briefly specify if Yes"
    },
    {
        "category": "MEDICAL",
        "question": "Is there mention of emergency situations or hospitalization experiences?",
        "hint": "Yes/No; briefly summarize if Yes"
    },
    {
        "category": "MEDICAL",
        "question": "How severe were the side effects mentioned by the user while using the medication?",
        "hint": "Use descriptive cues to classify as mild, moderate, or severe. If unclear, explain briefly."

    },
    {
        "category": "MEDICAL",
        "question": "Does the speaker indicate whether side effects improved or worsened over time?",
        "hint": "Improved/Worsened/No Change/Not Mentioned"
    },
    {
        "category": "MEDICAL",
        "question": "Does the speaker mention cognitive side effects (e.g., memory loss, difficulty concentrating)?",
        "hint": "Yes/No; briefly specify if Yes"
    },
    {
        "category": "MEDICAL",
        "question": "Does the speaker mention any sexual side effects (e.g., decreased libido)?",
        "hint": "Yes/No; briefly specify if Yes"
    },
    {
        "category": "MEDICAL",
        "question": "Is there mention of weight gain or weight loss due to medication?",
        "hint": "Gain/Loss/None Mentioned"
    },
    {
        "category": "MEDICAL",
        "question": "Does the speaker mention how long withdrawal symptoms lasted?",
        "hint": "Yes/No; specify duration if Yes"
    },
    {
        "category": "MEDICAL",
        "question": "Based on the user's description, how severe were the withdrawal symptoms they experienced?",
        "hint": "Assess the severity (mild, moderate, or severe) using the language and intensity described in the video."

    }
]

questions = []
for item in prompt_data:
    q_text = item['question']
    hint = item.get('hint')
    if hint:
        questions.append(f"Q: {q_text} (Hint: {hint})")
    else:
        questions.append(f"Q: {q_text}")


prompt += "\n".join(questions)

# Step 7: Run analysis and generate JSON output
results = []
for video_path in video_files:
    print(f"\n🔍 Analyzing {video_path}...")
    frame_paths = extract_frames(video_path)
    images = [Image.open(p) for p in frame_paths]

    try:
        response = model.generate_content([prompt] + images)
        response_text = response.text

        answers = {"Video File": video_path}
        for q in questions:
            q_clean = q[3:].split(' (Hint')[0].strip()  # Remove Q: and hint
            pattern = re.compile(rf"Q:\s*{re.escape(q_clean)}\s*A:\s*(.*?)(?=\nQ:|\Z)", re.DOTALL)
            match = pattern.search(response_text)
            answer = match.group(1).strip() if match else "Not found"
            answers[q_clean] = answer

        results.append(answers)
        print(f"✅ Done: {video_path}")

    except Exception as e:
        print(f"❌ Error processing {video_path}: {e}")

# Step 8: Save to JSON
json_output_path = "video_analysis_output006.json"
with open(json_output_path, "w", encoding="utf-8") as f:
    json.dump(results, f, indent=4, ensure_ascii=False)

print(f"\n✅ JSON saved to: {json_output_path}")


Saving Benzos vs Heroin. What’s harder to stop？？.webm to Benzos vs Heroin. What’s harder to stop？？.webm
Saving Dr. Jackie  Discusses the Transformative Effects o.webm to Dr. Jackie  Discusses the Transformative Effects o.webm
Saving How I Got Off Psych Meds in 10 Months.webm to How I Got Off Psych Meds in 10 Months.webm
Saving It Was the Worst Suffering of Any Suffering  #akathisia #informedconsent #psychmeds [2ou0xXjD-Qw].f614.mp4 to It Was the Worst Suffering of Any Suffering  #akathisia #informedconsent #psychmeds [2ou0xXjD-Qw].f614.mp4
Saving MY EXPERIENCE OF TAKING ANTIDEPRESSANTS ｜ side eff.mkv to MY EXPERIENCE OF TAKING ANTIDEPRESSANTS ｜ side eff.mkv
Saving my journey@battle to overcome my anxiety,stress an.webm to my journey@battle to overcome my anxiety,stress an.webm
Saving The “truth” about antidepressants… #antidepressant.webm to The “truth” about antidepressants… #antidepressant.webm
Saving The 6 things YOU should NOT expect from therapy… #.webm to The 6 things YOU should 

In [None]:
# Step 1: Install required libraries (if needed)
# !pip install google-generativeai opencv-python pillow

# Step 2: Import libraries
import cv2
import os
from PIL import Image
from google.colab import files # Using upload as per your code snippet
import google.generativeai as genai
import time
import json # <-- Import the json library

# Step 3: Upload multiple video files
print("Please upload your video files:")
uploaded = files.upload()
video_files = list(uploaded.keys())

# Step 4: Extract N sample frames from each video
def extract_frames(video_path, num_frames=3):
    # Check if the video file actually exists before processing
    if not os.path.exists(video_path):
        print(f"⚠ Warning: Video file not found at path: {video_path}. Skipping frame extraction.")
        return [], None # Return None for frame_dir if file doesn't exist

    # Create a temporary directory for frames specific to this video
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    # Ensure directory name is valid (replace potential problematic characters)
    safe_base_name = "".join(c if c.isalnum() or c in ('', '-') else '' for c in base_name)
    frame_dir = f"/content/{safe_base_name}frames{int(time.time())}" # Add timestamp for uniqueness
    try:
        os.makedirs(frame_dir, exist_ok=True)
    except OSError as e:
        print(f"❌ Error creating directory {frame_dir}: {e}. Skipping video.")
        return [], None


    vidcap = cv2.VideoCapture(video_path)
    if not vidcap.isOpened():
        print(f"⚠ Warning: Could not open video file: {video_path}. Skipping.")
        # Attempt cleanup of frame directory if it exists
        if os.path.isdir(frame_dir):
             try:
                 time.sleep(0.1) # Small delay
                 import shutil
                 shutil.rmtree(frame_dir)
             except Exception as e_rem:
                 print(f"  > Could not remove frame directory {frame_dir} after failed open: {e_rem}")
        return [], frame_dir # Return empty list and dir path for potential cleanup

    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames <= 0:
        print(f"⚠ Warning: Video file {video_path} has 0 or invalid frame count. Skipping.")
        vidcap.release()
        # Attempt cleanup of frame directory if it exists
        if os.path.isdir(frame_dir):
             try:
                 time.sleep(0.1) # Small delay
                 import shutil
                 shutil.rmtree(frame_dir)
             except Exception as e_rem:
                 print(f"  > Could not remove frame directory {frame_dir} after bad frame count: {e_rem}")
        return [], frame_dir # Return empty list and dir path for potential cleanup

    actual_num_frames = min(num_frames, total_frames)
    # Ensure step calculation avoids division by zero if actual_num_frames is 0 (though unlikely here)
    step = max(1, total_frames // actual_num_frames if actual_num_frames > 0 else 1)
    extracted = []

    for i in range(actual_num_frames):
        # Calculate frame number carefully to avoid going out of bounds
        frame_num = min(i * step, total_frames - 1)

        vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        success, image = vidcap.read()
        if success:
            # Use the dedicated frame directory
            frame_file = os.path.join(frame_dir, f"frame_{i}.jpg")
            try:
                cv2.imwrite(frame_file, image)
                extracted.append(frame_file)
            except Exception as e:
                print(f"⚠ Warning: Could not write frame {frame_file}. Error: {e}")
        else:
            # If reading at the calculated frame number fails, try reading the immediate next frame as a fallback
            # This can sometimes help with minor video inconsistencies
            # print(f"   > Read failed at frame {int(frame_num)}, attempting next frame...")
            success, image = vidcap.read() # Read the very next frame
            if success:
                frame_file = os.path.join(frame_dir, f"frame_{i}_fallback.jpg")
                try:
                    cv2.imwrite(frame_file, image)
                    extracted.append(frame_file)
                    # print(f"   > Successfully read fallback frame.")
                except Exception as e:
                    print(f"⚠ Warning: Could not write fallback frame {frame_file}. Error: {e}")
            else:
                print(f"⚠ Warning: Failed to read frame {i} (position {frame_num}) and fallback from {video_path}")

    vidcap.release()
    # print(f"   Extracted {len(extracted)} frames for {os.path.basename(video_path)} into {frame_dir}")
    return extracted, frame_dir # Return paths and the directory they are in

# Step 5: Configure Gemini API
# --- IMPORTANT: Replace with your actual API key ---
GEMINI_API_KEY = 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg' # <--- PASTE YOUR KEY HERE
# --- Fallback if running locally and key is in environment variable ---
if GEMINI_API_KEY == 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg':
    GEMINI_API_KEY = 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg'

if not GEMINI_API_KEY:
    print("❌ Error: Gemini API Key not found. Please paste it into the script or set the GEMINI_API_KEY environment variable.")
    model = None
else:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest") #gemini-1.5-pro-latest
        print("✅ Gemini API configured.")
    except Exception as e:
        print(f"❌ Error configuring Gemini API: {e}")
        model = None

# Step 6: Define the common prompt (Your prompt remains the same)
prompt = """
You are a mental health research assistant specialized in analyzing multimodal data (textual transcripts, audio cues, and visual content) extracted from TikTok/YouTube videos. Your primary task is to analyze  these videos that discuss personal experiences of antidepressant usage. As an AI and health researcher investigating medication experiences shared on TikTok/YouTube, examine each video to provide responses addressing the provided questions.

 Does the speaker in the video mention any side effects from the medication?(Give the response as “Yes” or “No”)
Did the speaker in the video talk about bodily discomforts and side effects from medication(e.g., nausea, fatigue, dizziness)?(Give response as “Yes”, “No”. Mention each of the side effects in 1 to 3 words)
Did the speaker report psychological side effects (like anxiety, numbness, agitation)? (Respond 'Yes' or 'No'. If 'Yes', mention each psychological effect described in 1-3 words.
Did the speaker (sharing their experience) report psychological side effects (like anxiety, numbness, agitation)? (Respond 'Yes' or 'No'. If 'Yes', mention each psychological effect described in 1-3 words.
After stopping antidepressants, did the speaker experience withdrawal symptoms? (Respond 'Yes' or 'No'. If 'Yes', list symptoms mentioned in 1-3 words, e.g., 'Brain zaps', 'High anxiety', 'Dizziness').
Did the speaker state their reason for starting antidepressants? (Respond 'Yes' or 'No'. If 'Yes', state the reason mentioned in 1-5 words, e.g., 'Severe depression', 'Panic attacks', 'Doctor's advice', 'Postpartum').
Analyze the speaker's reflection on the overall outcome and effectiveness of the antidepressant medication described in the video. Based only on the information provided by the speaker, classify the outcome into one of the following categories - positive, negative, mixed, still ongoing, not mentioned.
List the specific names of any antidepressant medications mentioned by the speaker. (Respond with the list of names, or 'None mentioned')
Did the speaker mention the specific dosage of their antidepressant(s)? (Respond 'Yes' or 'No'. If 'Yes', state the dosage(s) mentioned, e.g., '20mg daily', 'Started at 10mg')
Did the speaker mention how long they used the antidepressant(s)? (Respond 'Yes' or 'No'. If 'Yes', state the duration(s) mentioned, e.g., 'For about 2 years', 'Six months').
Did the speaker mention changing their antidepressant dosage (e.g., increasing, decreasing, tapering)? (Respond 'Yes' or 'No'. If 'Yes', briefly describe the change mentioned, e.g., 'Increased dose', 'Tapered off', 'Switched dosage')
Analyze the speaker's voice in the audio for emotional expression (considering tone, pitch, pace). Based only on these vocal characteristics, classify the dominant sentiment into one category: Happy, Sad, Stressed, Neutral.  If analysis is not possible, state 'analysis not possible'
Based on the visual and audio information in the video, estimate the speaker's apparent age group and gender. Respond with the age group and gender (e.g., Adult, Female). If reliable estimation is not possible, state 'Cannot determine'.
Based on the video's content, setting, presentation style, and any self-identification by the speaker, infer their most likely primary role relevant to the video's topic. Choose one category: Content Creator/ Influencer, Healthcare professional, Other.
Does the speaker explicitly encourage viewers to seek professional help or consult a healthcare provider?" (Yes/No; briefly specify if Yes)
Did the speaker mention trying multiple different antidepressant medications sequentially (one after another over time)? (Respond 'Yes' or 'No'. If 'Yes', state which medications were mentioned in 1-3 words, e.g., 'Yes, Prozac, Zoloft').
Categorize speaker's described withdrawal symptom severity: Mild | Moderate | Severe | Not Mentioned (symptom or severity level). Respond with category name only
Categorize speaker's described side effect severity (while taking meds): Mild | Moderate | Severe | Not Mentioned (effect or severity level). Respond with category name only


Give the response in JSON format. Make sure the output is a single valid JSON object starting with { and ending with }. Do not include json  or  markdown markers. Where ever there is a yes make it 1 and for no make it 0.
"""

# Step 6.5: Define Generation Configuration
generation_config = {
    "temperature": 0.2,
    "response_mime_type": "application/json", # <-- Request JSON output directly
}
print(f"ℹ Using generation config: {generation_config}")


# Step 7: Analyze each video (only if model is configured and videos exist)
all_responses = {} # This will store parsed JSON or error strings
processed_videos = 0 # Keep track of processed videos
video_frame_dirs = {} # Keep track of frame directories to clean up

if model and video_files:
    for video_path in video_files:
        video_basename = os.path.basename(video_path)
        print(f"\nAnalyzing {video_basename}...")
        frame_paths, frame_dir = extract_frames(video_path) # Get frame paths and their directory

        # Store frame_dir for cleanup later, even if extraction fails partially
        if frame_dir:
            video_frame_dirs[video_basename] = frame_dir

        # Only proceed if frames were successfully extracted
        if not frame_paths:
            error_message = f"❌ Error: Could not extract frames for analysis from {video_basename}."
            print(f"   {error_message}")
            all_responses[video_basename] = error_message
            continue # Skip to the next video

        images = []
        valid_frame_paths = [] # Store paths of images successfully opened
        for path in frame_paths:
            try:
                img = Image.open(path)
                images.append(img)
                valid_frame_paths.append(path)
            except FileNotFoundError:
                 print(f"⚠ Warning: Frame image file not found at {path}. Skipping this frame.")
            except Exception as e:
                print(f"⚠ Warning: Could not open frame image {path}. Error: {e}. Skipping this frame.")

        # Only proceed if at least one frame image could be opened
        if not images:
            error_message = f"❌ Error: Could not open any extracted frames for analysis for {video_basename}."
            print(f"   {error_message}")
            all_responses[video_basename] = error_message
        else:
            try:
                # Pass the generation_config here
                # The API expects a list where the first item is the prompt,
                # and subsequent items are the images.
                content_list = [prompt] + images
                response = model.generate_content(
                    content_list,
                    generation_config=generation_config,
                    # Optional: Add safety settings if needed
                    # safety_settings=[
                    #     {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    # ]
                )

                # Check if response has text (handle potential safety blocks etc.)
                if hasattr(response, 'text') and response.text:
                    try:
                        # *** PARSE THE JSON RESPONSE ***
                        # Clean potential markdown formatting if the model ignored the instruction
                        cleaned_text = response.text.strip()
                        if cleaned_text.startswith("json"):
                            cleaned_text = cleaned_text[7:]
                        if cleaned_text.endswith("```"):
                            cleaned_text = cleaned_text[:-3]
                        cleaned_text = cleaned_text.strip() # Remove any surrounding whitespace

                        parsed_json = json.loads(cleaned_text)
                        all_responses[video_basename] = parsed_json # Store the parsed dictionary
                        print(f"✅ Finished analysis for: {video_basename}")
                    except json.JSONDecodeError as json_e:
                        error_message = f"❌ Error: Could not parse JSON response for {video_basename}. Error: {json_e}. Response text: {response.text}"
                        print(error_message)
                        all_responses[video_basename] = {"error": "Failed to parse API JSON response", "raw_response": response.text} # Store error dict
                    except Exception as parse_e:
                         error_message = f"❌ Error: Unexpected error parsing response for {video_basename}. Error: {parse_e}. Response text: {response.text}"
                         print(error_message)
                         all_responses[video_basename] = {"error": "Unexpected error parsing API response", "raw_response": response.text} # Store error dict

                else:
                     # If no 'text' attribute or empty text, check for blocking reasons
                     block_reason = "Unknown reason or empty response"
                     finish_reason = "Unknown"
                     try:
                         # Access prompt_feedback safely
                         if response.prompt_feedback and response.prompt_feedback.block_reason:
                             block_reason = response.prompt_feedback.block_reason
                         # Access finish_reason safely (available in newer API versions)
                         if hasattr(response, 'candidates') and response.candidates:
                              if hasattr(response.candidates[0], 'finish_reason'):
                                   finish_reason = response.candidates[0].finish_reason.name # Get the enum name
                     except Exception as feedback_e:
                          print(f"   > Error accessing response feedback details: {feedback_e}")


                     error_message = f"❌ Analysis blocked or failed for {video_basename}. Finish Reason: {finish_reason}, Block Reason: {block_reason}"
                     print(error_message)
                     all_responses[video_basename] = {"error": error_message} # Store error dict


                processed_videos += 1
            except Exception as e:
                # Catch potential errors during the API call itself
                import traceback
                print(f"❌ Error during API call for {video_basename}:")
                # Print detailed traceback for debugging API call errors
                # traceback.print_exc() # Uncomment for more detailed logs if needed
                print(f"   Error Details: {e}")
                error_message_api = f"❌ Error during API call for {video_basename}: {type(e)._name_} - {e}"
                all_responses[video_basename] = {"error": error_message_api} # Store error dict


elif not model:
    print("\n❌ Skipping video analysis because the Gemini model could not be configured.")
elif not video_files:
    print("\n❌ No video files were uploaded or found.")

# Step 8: Display individual results (optional, keep if you want this)
print(f"\n📊 Individual Analysis Results (Processed {processed_videos} videos):")
if not all_responses:
    print("No analysis results to display.")
else:
    for video_name, result in all_responses.items():
        print(f"\n=== {video_name} ===")
        if isinstance(result, dict):
            # Pretty print the dictionary if it's parsed JSON or an error dict
            print(json.dumps(result, indent=2))
        else:
            # Print the string directly if it's a simple error message
            print(result)
        print("-" * (len(video_name) + 8)) # Separator

# Step 9: Generate and Display Combined JSON Output *** NEW STEP ***
print("\n" + "="*50)
print("📊 Combined JSON Output for All Videos")
print("="*50)

if not all_responses:
    print("{}") # Print empty JSON object if no results
else:
    try:
        # Convert the entire all_responses dictionary to a JSON string
        combined_json_string = json.dumps(all_responses, indent=4) # indent=4 for pretty printing
        print(combined_json_string)
    except Exception as e:
        print(f"❌ Error generating combined JSON: {e}")
        print("\nRaw all_responses dictionary content:")
        import pprint
        pprint.pprint(all_responses) # Print the raw dictionary if JSON generation fails


# Step 10: Clean up frame directories
print("\nCleaning up temporary frame directories...")
cleaned_count = 0
error_count = 0
if video_frame_dirs: # Check if the dictionary is not empty
    import shutil
    for video_name, dir_path in video_frame_dirs.items():
        if dir_path and os.path.isdir(dir_path): # Check if path exists and is a directory
            try:
                time.sleep(0.5) # Increased delay before removing, might help with file handles
                shutil.rmtree(dir_path)
                # print(f"   Cleaned up: {dir_path} (for {video_name})")
                cleaned_count += 1
            except PermissionError as pe:
                 print(f"  > PermissionError removing {dir_path}: {pe}. Files might still be in use.")
                 error_count += 1
            except FileNotFoundError:
                 # This might happen if cleanup was attempted earlier after an error
                 # print(f"  > Directory not found (already removed?): {dir_path}")
                 pass # Ignore if already gone
            except Exception as e_rem:
                print(f"  > Could not remove frame directory {dir_path}: {e_rem}")
                error_count += 1
        # else:
            # print(f"   Skipping cleanup for {video_name}, invalid directory path: {dir_path}")
else:
     print("   No frame directories tracked for cleanup.")

print(f"Cleanup Summary: {cleaned_count} directories removed, {error_count} errors.")


# Optional: Clean up original uploaded files if needed (they are in /content/ by default in Colab)
# print("\nCleaning up uploaded video files...")
# cleanup_removed = 0
# cleanup_errors = 0
# for video_path in video_files:
#    full_video_path = os.path.join("/content/", video_path) # Assuming Colab's /content/ directory
#    if os.path.exists(full_video_path):
#        try:
#            os.remove(full_video_path)
#            # print(f"  Removed: {full_video_path}")
#            cleanup_removed += 1
#        except Exception as e:
#            print(f"  Could not remove {full_video_path}: {e}")
#            cleanup_errors += 1
# print(f"Uploaded Video Cleanup: {cleanup_removed} removed, {cleanup_errors} errors.")

Please upload your video files:


Saving First day on ADHD meds #mentalhealth #adhd #adhdwo.webm to First day on ADHD meds #mentalhealth #adhd #adhdwo.webm
Saving Going through a storm of Akathisia and Tardive Dys.webm to Going through a storm of Akathisia and Tardive Dys.webm
Saving Have you been screened for Tardive Dyskinesia (TD).webm to Have you been screened for Tardive Dyskinesia (TD).webm
Saving Heroin vs Antidepressants： which withdrawal is wor.webm to Heroin vs Antidepressants： which withdrawal is wor.webm
Saving His Life Would Have Been Awful #accutane #mentalhe.webm to His Life Would Have Been Awful #accutane #mentalhe.webm
Saving How Antidepressants Saved My Life.mkv to How Antidepressants Saved My Life.mkv
Saving How I Got Off Psych Meds in 10 Months.webm to How I Got Off Psych Meds in 10 Months.webm
Saving How I overcame depression without medication.mp4 to How I overcame depression without medication.mp4
Saving How I REDUCED my PSYCH MEDS by 60_ in 3 months.webm to How I REDUCED my PSYCH MEDS by 60_ in 

In [None]:
# Step 1: Install required libraries (if needed)
# !pip install google-generativeai opencv-python pillow

# Step 2: Import libraries
import cv2
import os
from PIL import Image
from google.colab import files # Using upload as per your code snippet
import google.generativeai as genai
import time
import json # <-- Import the json library

# Step 3: Upload multiple video files
print("Please upload your video files:")
uploaded = files.upload()
video_files = list(uploaded.keys())

# Step 4: Extract N sample frames from each video
def extract_frames(video_path, num_frames=3):
    # Check if the video file actually exists before processing
    if not os.path.exists(video_path):
        print(f"⚠ Warning: Video file not found at path: {video_path}. Skipping frame extraction.")
        return [], None # Return None for frame_dir if file doesn't exist

    # Create a temporary directory for frames specific to this video
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    # Ensure directory name is valid (replace potential problematic characters)
    safe_base_name = "".join(c if c.isalnum() or c in ('', '-') else '' for c in base_name)
    frame_dir = f"/content/{safe_base_name}frames{int(time.time())}" # Add timestamp for uniqueness
    try:
        os.makedirs(frame_dir, exist_ok=True)
    except OSError as e:
        print(f"❌ Error creating directory {frame_dir}: {e}. Skipping video.")
        return [], None


    vidcap = cv2.VideoCapture(video_path)
    if not vidcap.isOpened():
        print(f"⚠ Warning: Could not open video file: {video_path}. Skipping.")
        # Attempt cleanup of frame directory if it exists
        if os.path.isdir(frame_dir):
             try:
                 time.sleep(0.1) # Small delay
                 import shutil
                 shutil.rmtree(frame_dir)
             except Exception as e_rem:
                 print(f"  > Could not remove frame directory {frame_dir} after failed open: {e_rem}")
        return [], frame_dir # Return empty list and dir path for potential cleanup

    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames <= 0:
        print(f"⚠ Warning: Video file {video_path} has 0 or invalid frame count. Skipping.")
        vidcap.release()
        # Attempt cleanup of frame directory if it exists
        if os.path.isdir(frame_dir):
             try:
                 time.sleep(0.1) # Small delay
                 import shutil
                 shutil.rmtree(frame_dir)
             except Exception as e_rem:
                 print(f"  > Could not remove frame directory {frame_dir} after bad frame count: {e_rem}")
        return [], frame_dir # Return empty list and dir path for potential cleanup

    actual_num_frames = min(num_frames, total_frames)
    # Ensure step calculation avoids division by zero if actual_num_frames is 0 (though unlikely here)
    step = max(1, total_frames // actual_num_frames if actual_num_frames > 0 else 1)
    extracted = []

    for i in range(actual_num_frames):
        # Calculate frame number carefully to avoid going out of bounds
        frame_num = min(i * step, total_frames - 1)

        vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        success, image = vidcap.read()
        if success:
            # Use the dedicated frame directory
            frame_file = os.path.join(frame_dir, f"frame_{i}.jpg")
            try:
                cv2.imwrite(frame_file, image)
                extracted.append(frame_file)
            except Exception as e:
                print(f"⚠ Warning: Could not write frame {frame_file}. Error: {e}")
        else:
            # If reading at the calculated frame number fails, try reading the immediate next frame as a fallback
            # This can sometimes help with minor video inconsistencies
            # print(f"   > Read failed at frame {int(frame_num)}, attempting next frame...")
            success, image = vidcap.read() # Read the very next frame
            if success:
                frame_file = os.path.join(frame_dir, f"frame_{i}_fallback.jpg")
                try:
                    cv2.imwrite(frame_file, image)
                    extracted.append(frame_file)
                    # print(f"   > Successfully read fallback frame.")
                except Exception as e:
                    print(f"⚠ Warning: Could not write fallback frame {frame_file}. Error: {e}")
            else:
                print(f"⚠ Warning: Failed to read frame {i} (position {frame_num}) and fallback from {video_path}")

    vidcap.release()
    # print(f"   Extracted {len(extracted)} frames for {os.path.basename(video_path)} into {frame_dir}")
    return extracted, frame_dir # Return paths and the directory they are in

# Step 5: Configure Gemini API
# --- IMPORTANT: Replace with your actual API key ---
GEMINI_API_KEY = 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg' # <--- PASTE YOUR KEY HERE
# --- Fallback if running locally and key is in environment variable ---
if GEMINI_API_KEY == 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg':
    GEMINI_API_KEY = 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg'

if not GEMINI_API_KEY:
    print("❌ Error: Gemini API Key not found. Please paste it into the script or set the GEMINI_API_KEY environment variable.")
    model = None
else:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest") #gemini-1.5-pro-latest
        print("✅ Gemini API configured.")
    except Exception as e:
        print(f"❌ Error configuring Gemini API: {e}")
        model = None

# Step 6: Define the common prompt (Your prompt remains the same)
prompt = """
You are a mental health research assistant specialized in analyzing multimodal data (textual transcripts, audio cues, and visual content) extracted from TikTok/YouTube videos. Your primary task is to analyze  these videos that discuss personal experiences of antidepressant usage. As an AI and health researcher investigating medication experiences shared on TikTok/YouTube, examine each video to provide responses addressing the provided questions.

 Does the speaker in the video mention any side effects from the medication?(Give the response as “Yes” or “No”)
Did the speaker in the video talk about bodily discomforts and side effects from medication(e.g., nausea, fatigue, dizziness)?(Give response as “Yes”, “No”. Mention each of the side effects in 1 to 3 words)
Did the speaker report psychological side effects (like anxiety, numbness, agitation)? (Respond 'Yes' or 'No'. If 'Yes', mention each psychological effect described in 1-3 words.
Did the speaker (sharing their experience) report psychological side effects (like anxiety, numbness, agitation)? (Respond 'Yes' or 'No'. If 'Yes', mention each psychological effect described in 1-3 words.
After stopping antidepressants, did the speaker experience withdrawal symptoms? (Respond 'Yes' or 'No'. If 'Yes', list symptoms mentioned in 1-3 words, e.g., 'Brain zaps', 'High anxiety', 'Dizziness').
Did the speaker state their reason for starting antidepressants? (Respond 'Yes' or 'No'. If 'Yes', state the reason mentioned in 1-5 words, e.g., 'Severe depression', 'Panic attacks', 'Doctor's advice', 'Postpartum').
Analyze the speaker's reflection on the overall outcome and effectiveness of the antidepressant medication described in the video. Based only on the information provided by the speaker, classify the outcome into one of the following categories - positive, negative, mixed, still ongoing, not mentioned.
List the specific names of any antidepressant medications mentioned by the speaker. (Respond with the list of names, or 'None mentioned')
Did the speaker mention the specific dosage of their antidepressant(s)? (Respond 'Yes' or 'No'. If 'Yes', state the dosage(s) mentioned, e.g., '20mg daily', 'Started at 10mg')
Did the speaker mention how long they used the antidepressant(s)? (Respond 'Yes' or 'No'. If 'Yes', state the duration(s) mentioned, e.g., 'For about 2 years', 'Six months').
Did the speaker mention changing their antidepressant dosage (e.g., increasing, decreasing, tapering)? (Respond 'Yes' or 'No'. If 'Yes', briefly describe the change mentioned, e.g., 'Increased dose', 'Tapered off', 'Switched dosage')
Analyze the speaker's voice in the audio for emotional expression (considering tone, pitch, pace). Based only on these vocal characteristics, classify the dominant sentiment into one category: Happy, Sad, Stressed, Neutral.  If analysis is not possible, state 'analysis not possible'
Based on the visual and audio information in the video, estimate the speaker's apparent age group and gender. Respond with the age group and gender (e.g., Adult, Female). If reliable estimation is not possible, state 'Cannot determine'.
Based on the video's content, setting, presentation style, and any self-identification by the speaker, infer their most likely primary role relevant to the video's topic. Choose one category: Content Creator/ Influencer, Healthcare professional, Other.
Does the speaker explicitly encourage viewers to seek professional help or consult a healthcare provider?" (Yes/No; briefly specify if Yes)
Did the speaker mention trying multiple different antidepressant medications sequentially (one after another over time)? (Respond 'Yes' or 'No'. If 'Yes', state which medications were mentioned in 1-3 words, e.g., 'Yes, Prozac, Zoloft').
Categorize speaker's described withdrawal symptom severity: Mild | Moderate | Severe | Not Mentioned (symptom or severity level). Respond with category name only
Categorize speaker's described side effect severity (while taking meds): Mild | Moderate | Severe | Not Mentioned (effect or severity level). Respond with category name only
Analyze the speaker's overall expressed stance towards antidepressants, based only on the information presented in the video. Choose the single category that best describes their primary message regarding advocating for or warning against antidepressant use:Advocacy, Warning, Mixed, Neutral, Not mentioned.


Give the response in JSON format. Make sure the output is a single valid JSON object starting with { and ending with }. Do not include json  or  markdown markers. Where ever there is a yes make it 1 and for no make it 0.
"""

# Step 6.5: Define Generation Configuration
generation_config = {
    "temperature": 0.2,
    "response_mime_type": "application/json", # <-- Request JSON output directly
}
print(f"ℹ Using generation config: {generation_config}")


# Step 7: Analyze each video (only if model is configured and videos exist)
all_responses = {} # This will store parsed JSON or error strings
processed_videos = 0 # Keep track of processed videos
video_frame_dirs = {} # Keep track of frame directories to clean up

if model and video_files:
    for video_path in video_files:
        video_basename = os.path.basename(video_path)
        print(f"\nAnalyzing {video_basename}...")
        frame_paths, frame_dir = extract_frames(video_path) # Get frame paths and their directory

        # Store frame_dir for cleanup later, even if extraction fails partially
        if frame_dir:
            video_frame_dirs[video_basename] = frame_dir

        # Only proceed if frames were successfully extracted
        if not frame_paths:
            error_message = f"❌ Error: Could not extract frames for analysis from {video_basename}."
            print(f"   {error_message}")
            all_responses[video_basename] = error_message
            continue # Skip to the next video

        images = []
        valid_frame_paths = [] # Store paths of images successfully opened
        for path in frame_paths:
            try:
                img = Image.open(path)
                images.append(img)
                valid_frame_paths.append(path)
            except FileNotFoundError:
                 print(f"⚠ Warning: Frame image file not found at {path}. Skipping this frame.")
            except Exception as e:
                print(f"⚠ Warning: Could not open frame image {path}. Error: {e}. Skipping this frame.")

        # Only proceed if at least one frame image could be opened
        if not images:
            error_message = f"❌ Error: Could not open any extracted frames for analysis for {video_basename}."
            print(f"   {error_message}")
            all_responses[video_basename] = error_message
        else:
            try:
                # Pass the generation_config here
                # The API expects a list where the first item is the prompt,
                # and subsequent items are the images.
                content_list = [prompt] + images
                response = model.generate_content(
                    content_list,
                    generation_config=generation_config,
                    # Optional: Add safety settings if needed
                    # safety_settings=[
                    #     {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    # ]
                )

                # Check if response has text (handle potential safety blocks etc.)
                if hasattr(response, 'text') and response.text:
                    try:
                        # *** PARSE THE JSON RESPONSE ***
                        # Clean potential markdown formatting if the model ignored the instruction
                        cleaned_text = response.text.strip()
                        if cleaned_text.startswith("json"):
                            cleaned_text = cleaned_text[7:]
                        if cleaned_text.endswith("```"):
                            cleaned_text = cleaned_text[:-3]
                        cleaned_text = cleaned_text.strip() # Remove any surrounding whitespace

                        parsed_json = json.loads(cleaned_text)
                        all_responses[video_basename] = parsed_json # Store the parsed dictionary
                        print(f"✅ Finished analysis for: {video_basename}")
                    except json.JSONDecodeError as json_e:
                        error_message = f"❌ Error: Could not parse JSON response for {video_basename}. Error: {json_e}. Response text: {response.text}"
                        print(error_message)
                        all_responses[video_basename] = {"error": "Failed to parse API JSON response", "raw_response": response.text} # Store error dict
                    except Exception as parse_e:
                         error_message = f"❌ Error: Unexpected error parsing response for {video_basename}. Error: {parse_e}. Response text: {response.text}"
                         print(error_message)
                         all_responses[video_basename] = {"error": "Unexpected error parsing API response", "raw_response": response.text} # Store error dict

                else:
                     # If no 'text' attribute or empty text, check for blocking reasons
                     block_reason = "Unknown reason or empty response"
                     finish_reason = "Unknown"
                     try:
                         # Access prompt_feedback safely
                         if response.prompt_feedback and response.prompt_feedback.block_reason:
                             block_reason = response.prompt_feedback.block_reason
                         # Access finish_reason safely (available in newer API versions)
                         if hasattr(response, 'candidates') and response.candidates:
                              if hasattr(response.candidates[0], 'finish_reason'):
                                   finish_reason = response.candidates[0].finish_reason.name # Get the enum name
                     except Exception as feedback_e:
                          print(f"   > Error accessing response feedback details: {feedback_e}")


                     error_message = f"❌ Analysis blocked or failed for {video_basename}. Finish Reason: {finish_reason}, Block Reason: {block_reason}"
                     print(error_message)
                     all_responses[video_basename] = {"error": error_message} # Store error dict


                processed_videos += 1
            except Exception as e:
                # Catch potential errors during the API call itself
                import traceback
                print(f"❌ Error during API call for {video_basename}:")
                # Print detailed traceback for debugging API call errors
                # traceback.print_exc() # Uncomment for more detailed logs if needed
                print(f"   Error Details: {e}")
                error_message_api = f"❌ Error during API call for {video_basename}: {type(e)._name_} - {e}"
                all_responses[video_basename] = {"error": error_message_api} # Store error dict


elif not model:
    print("\n❌ Skipping video analysis because the Gemini model could not be configured.")
elif not video_files:
    print("\n❌ No video files were uploaded or found.")

# Step 8: Display individual results (optional, keep if you want this)
print(f"\n📊 Individual Analysis Results (Processed {processed_videos} videos):")
if not all_responses:
    print("No analysis results to display.")
else:
    for video_name, result in all_responses.items():
        print(f"\n=== {video_name} ===")
        if isinstance(result, dict):
            # Pretty print the dictionary if it's parsed JSON or an error dict
            print(json.dumps(result, indent=2))
        else:
            # Print the string directly if it's a simple error message
            print(result)
        print("-" * (len(video_name) + 8)) # Separator

# ──────────────────────────────────────────────────────────────────────────
# STEP 9½ – Save combined results to CSV and download
# ──────────────────────────────────────────────────────────────────────────
try:
    import pandas as pd
except ImportError:
    !pip install pandas --quiet
    import pandas as pd

csv_rows = []

for video_name, result in all_responses.items():
    # Keep only successfully parsed JSON objects (skip error strings / dicts)
    if isinstance(result, dict) and "error" not in result:
        flat_row = {"Video File": video_name}
        flat_row.update(result)          # add every key from the JSON
        csv_rows.append(flat_row)
    else:
        # Optionally record errors in the CSV
        flat_row = {"Video File": video_name, "error": result if isinstance(result, str) else result.get("error", "Unknown error")}
        csv_rows.append(flat_row)

if csv_rows:
    df = pd.DataFrame(csv_rows)
    csv_path = "video_analysis_output003.csv"
    df.to_csv(csv_path, index=False)
    print(f"\n✅ CSV saved to: {csv_path}")

    # Trigger download in Colab
    from google.colab import files
    files.download(csv_path)
else:
    print("\n⚠ No successful results to write to CSV.")



# Step 10: Clean up frame directories
print("\nCleaning up temporary frame directories...")
cleaned_count = 0
error_count = 0
if video_frame_dirs: # Check if the dictionary is not empty
    import shutil
    for video_name, dir_path in video_frame_dirs.items():
        if dir_path and os.path.isdir(dir_path): # Check if path exists and is a directory
            try:
                time.sleep(0.5) # Increased delay before removing, might help with file handles
                shutil.rmtree(dir_path)
                # print(f"   Cleaned up: {dir_path} (for {video_name})")
                cleaned_count += 1
            except PermissionError as pe:
                 print(f"  > PermissionError removing {dir_path}: {pe}. Files might still be in use.")
                 error_count += 1
            except FileNotFoundError:
                 # This might happen if cleanup was attempted earlier after an error
                 # print(f"  > Directory not found (already removed?): {dir_path}")
                 pass # Ignore if already gone
            except Exception as e_rem:
                print(f"  > Could not remove frame directory {dir_path}: {e_rem}")
                error_count += 1
        # else:
            # print(f"   Skipping cleanup for {video_name}, invalid directory path: {dir_path}")
else:
     print("   No frame directories tracked for cleanup.")

print(f"Cleanup Summary: {cleaned_count} directories removed, {error_count} errors.")


# Optional: Clean up original uploaded files if needed (they are in /content/ by default in Colab)
# print("\nCleaning up uploaded video files...")
# cleanup_removed = 0
# cleanup_errors = 0
# for video_path in video_files:
#    full_video_path = os.path.join("/content/", video_path) # Assuming Colab's /content/ directory
#    if os.path.exists(full_video_path):
#        try:
#            os.remove(full_video_path)
#            # print(f"  Removed: {full_video_path}")
#            cleanup_removed += 1
#        except Exception as e:
#            print(f"  Could not remove {full_video_path}: {e}")
#            cleanup_errors += 1
# print(f"Uploaded Video Cleanup: {cleanup_removed} removed, {cleanup_errors} errors.")

Please upload your video files:


Saving First day on ADHD meds #mentalhealth #adhd #adhdwo.webm to First day on ADHD meds #mentalhealth #adhd #adhdwo (1).webm
Saving Going through a storm of Akathisia and Tardive Dys.webm to Going through a storm of Akathisia and Tardive Dys (1).webm
Saving Have you been screened for Tardive Dyskinesia (TD).webm to Have you been screened for Tardive Dyskinesia (TD) (1).webm
Saving Heroin vs Antidepressants： which withdrawal is wor.webm to Heroin vs Antidepressants： which withdrawal is wor (1).webm
Saving His Life Would Have Been Awful #accutane #mentalhe.webm to His Life Would Have Been Awful #accutane #mentalhe (1).webm
✅ Gemini API configured.
ℹ Using generation config: {'temperature': 0.2, 'response_mime_type': 'application/json'}

Analyzing First day on ADHD meds #mentalhealth #adhd #adhdwo (1).webm...
✅ Finished analysis for: First day on ADHD meds #mentalhealth #adhd #adhdwo (1).webm

Analyzing Going through a storm of Akathisia and Tardive Dys (1).webm...
✅ Finished analysis fo

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Cleaning up temporary frame directories...
Cleanup Summary: 5 directories removed, 0 errors.


In [None]:
# Step 1: Install required libraries (if needed)
# !pip install google-generativeai opencv-python pillow

# Step 2: Import libraries
import cv2
import os
from PIL import Image
from google.colab import files # Using upload as per your code snippet
import google.generativeai as genai
import time
import json # <-- Import the json library

# Step 3: Upload multiple video files
print("Please upload your video files:")
uploaded = files.upload()
video_files = list(uploaded.keys())

# Step 4: Extract N sample frames from each video
def extract_frames(video_path, num_frames=3):
    # Check if the video file actually exists before processing
    if not os.path.exists(video_path):
        print(f"⚠ Warning: Video file not found at path: {video_path}. Skipping frame extraction.")
        return [], None # Return None for frame_dir if file doesn't exist

    # Create a temporary directory for frames specific to this video
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    # Ensure directory name is valid (replace potential problematic characters)
    safe_base_name = "".join(c if c.isalnum() or c in ('', '-') else '' for c in base_name)
    frame_dir = f"/content/{safe_base_name}frames{int(time.time())}" # Add timestamp for uniqueness
    try:
        os.makedirs(frame_dir, exist_ok=True)
    except OSError as e:
        print(f"❌ Error creating directory {frame_dir}: {e}. Skipping video.")
        return [], None


    vidcap = cv2.VideoCapture(video_path)
    if not vidcap.isOpened():
        print(f"⚠ Warning: Could not open video file: {video_path}. Skipping.")
        # Attempt cleanup of frame directory if it exists
        if os.path.isdir(frame_dir):
             try:
                 time.sleep(0.1) # Small delay
                 import shutil
                 shutil.rmtree(frame_dir)
             except Exception as e_rem:
                 print(f"  > Could not remove frame directory {frame_dir} after failed open: {e_rem}")
        return [], frame_dir # Return empty list and dir path for potential cleanup

    total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames <= 0:
        print(f"⚠ Warning: Video file {video_path} has 0 or invalid frame count. Skipping.")
        vidcap.release()
        # Attempt cleanup of frame directory if it exists
        if os.path.isdir(frame_dir):
             try:
                 time.sleep(0.1) # Small delay
                 import shutil
                 shutil.rmtree(frame_dir)
             except Exception as e_rem:
                 print(f"  > Could not remove frame directory {frame_dir} after bad frame count: {e_rem}")
        return [], frame_dir # Return empty list and dir path for potential cleanup

    actual_num_frames = min(num_frames, total_frames)
    # Ensure step calculation avoids division by zero if actual_num_frames is 0 (though unlikely here)
    step = max(1, total_frames // actual_num_frames if actual_num_frames > 0 else 1)
    extracted = []

    for i in range(actual_num_frames):
        # Calculate frame number carefully to avoid going out of bounds
        frame_num = min(i * step, total_frames - 1)

        vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        success, image = vidcap.read()
        if success:
            # Use the dedicated frame directory
            frame_file = os.path.join(frame_dir, f"frame_{i}.jpg")
            try:
                cv2.imwrite(frame_file, image)
                extracted.append(frame_file)
            except Exception as e:
                print(f"⚠ Warning: Could not write frame {frame_file}. Error: {e}")
        else:
            # If reading at the calculated frame number fails, try reading the immediate next frame as a fallback
            # This can sometimes help with minor video inconsistencies
            # print(f"   > Read failed at frame {int(frame_num)}, attempting next frame...")
            success, image = vidcap.read() # Read the very next frame
            if success:
                frame_file = os.path.join(frame_dir, f"frame_{i}_fallback.jpg")
                try:
                    cv2.imwrite(frame_file, image)
                    extracted.append(frame_file)
                    # print(f"   > Successfully read fallback frame.")
                except Exception as e:
                    print(f"⚠ Warning: Could not write fallback frame {frame_file}. Error: {e}")
            else:
                print(f"⚠ Warning: Failed to read frame {i} (position {frame_num}) and fallback from {video_path}")

    vidcap.release()
    # print(f"   Extracted {len(extracted)} frames for {os.path.basename(video_path)} into {frame_dir}")
    return extracted, frame_dir # Return paths and the directory they are in

# Step 5: Configure Gemini API
# --- IMPORTANT: Replace with your actual API key ---
GEMINI_API_KEY = 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg' # <--- PASTE YOUR KEY HERE
# --- Fallback if running locally and key is in environment variable ---
if GEMINI_API_KEY == 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg':
    GEMINI_API_KEY = 'AIzaSyBRJTYmsH6QQccFjV0kzkEotiWdSrEPHqg'

if not GEMINI_API_KEY:
    print("❌ Error: Gemini API Key not found. Please paste it into the script or set the GEMINI_API_KEY environment variable.")
    model = None
else:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest") #gemini-1.5-pro-latest
        print("✅ Gemini API configured.")
    except Exception as e:
        print(f"❌ Error configuring Gemini API: {e}")
        model = None

# Step 6: Define the common prompt (Your prompt remains the same)
prompt = """
You are a mental health research assistant specialized in analyzing multimodal data (textual transcripts, audio cues, and visual content) extracted from TikTok/YouTube videos. Your primary task is to analyze  these videos that discuss personal experiences of antidepressant usage. As an AI and health researcher investigating medication experiences shared on TikTok/YouTube, examine each video to provide responses addressing the provided questions.

Title: Add the title of the video
  Bodily Discomfort: Does the speaker in the video talk about bodily discomforts and side effects from medication? If the speaker has mentioned bodily discomfort return 1. If the speaker has not mentioned any bodily discomfort return 0(Mention each of the side effects in 1 to 3 words)
 Psychological Discomfort: Does the user report psychological side effects from the medication? If the speaker has mentioned psychological side effects from the medication return 1. If the speaker has not mentioned any psychological side effects return 0(Mention each of the side effects in 1 to 3 words.
 Overall Health Change: Does the speaker report improvement or decline in overall health while on medication? If an improvement is mentioned, return 1. If decline is mentioned return -1. If no improvement or decline is mentioned return 0
 Support Use of Medication: Does the speaker advocate for the medication or warn against the use of medication? If the speaker advocate for the use of medication return 1, if the speaker advocates against the use of medication return -1, if the speaker does not advocate for or warns against the use of medication return 0.
Stopping the medication: Does the speaker mention stopping the medication? If yes return 1. If the speaker mentions they are continuing the medication return -1. If the speaker does not mention stopping the use of medication or continuing it return 0.
Withdrawal: After stopping antidepressants, does the speaker experience withdrawal symptoms? If yes return 1, if the speaker does not mention withdrawal symptoms return 0. (Mention each of the withdrawal symptoms in 1 to 3 words)
Medication Name: What antidepressant medication(s) does the speaker mention by name? If the speaker mentions multiple antidepressants medications list all of them. If no antidepressant medication is mentioned return None
Dosage: Does the speaker mention dosage of one or all of the antidepressant medications used? List out all the medications along with their dosages mentioned
 Change in Medication: Does the user mention a change in medication dosage? Specify details if yes return 1 if the speaker mentions continuing the dosage return -1 and if the speaker does not speak of change at all then return 0 (Give a change in dosage as 10mg to 20mg or 50mg to 25mg along with the name of medication if mentioned)
Duration of Usage: Did the speaker mention length of usage for antidepressant medication? If the user mentions the length of the medication return the duration in month. If no duration is mentioned return 0. Return only a number no text or explanation
Alternative Treatment: What alternative treatments did the user. This maybe things like therapy, exercise, healthy diet etc. Return each of the alternative treatment types in 1 to 3 words
Emotional Tone: What emotional tone does the user convey throughout the video?
 Gender: Based on the video image of the video and the audio does the user appear to be a Male or a Female. Return Male if determined as Male return Female if determined as Female return Undetermined if could not determine
Age: Based on the video image and the audio pitch try to determine the age of the speaker. Return Adult if the speaker appears to be an adult, return Child if the user appears to be a child, return teenager if the speaker appears to be a teenager, return Elderly if the speaker appear to be an Elderly individual.


Content Creator/Health Professional: Is the person in the video a content creator/influencer or a healthcare professional such as a nurse or therapist? If the speaker mentions or appears to be a content creator return 1 if the speaker appears to be a doctor return -1 if it is not mentioned return 0"


Sucidal Tendency: Are suicidal thoughts or tendencies discussed in the video? If suicidal thoughts or tendencies are mentioned return 1 otherwise return 0"

Give the response in JSON format. Make sure the output is a single valid JSON object starting with { and ending with }. Do not include json  or  markdown markers. Where ever there is a yes make it 1 and for no make it 0.
"""

# Step 6.5: Define Generation Configuration
generation_config = {
    "temperature": 0.2,
    "response_mime_type": "application/json", # <-- Request JSON output directly
}
print(f"ℹ Using generation config: {generation_config}")


# Step 7: Analyze each video (only if model is configured and videos exist)
all_responses = {} # This will store parsed JSON or error strings
processed_videos = 0 # Keep track of processed videos
video_frame_dirs = {} # Keep track of frame directories to clean up

if model and video_files:
    for video_path in video_files:
        video_basename = os.path.basename(video_path)
        print(f"\nAnalyzing {video_basename}...")
        frame_paths, frame_dir = extract_frames(video_path) # Get frame paths and their directory

        # Store frame_dir for cleanup later, even if extraction fails partially
        if frame_dir:
            video_frame_dirs[video_basename] = frame_dir

        # Only proceed if frames were successfully extracted
        if not frame_paths:
            error_message = f"❌ Error: Could not extract frames for analysis from {video_basename}."
            print(f"   {error_message}")
            all_responses[video_basename] = error_message
            continue # Skip to the next video

        images = []
        valid_frame_paths = [] # Store paths of images successfully opened
        for path in frame_paths:
            try:
                img = Image.open(path)
                images.append(img)
                valid_frame_paths.append(path)
            except FileNotFoundError:
                 print(f"⚠ Warning: Frame image file not found at {path}. Skipping this frame.")
            except Exception as e:
                print(f"⚠ Warning: Could not open frame image {path}. Error: {e}. Skipping this frame.")

        # Only proceed if at least one frame image could be opened
        if not images:
            error_message = f"❌ Error: Could not open any extracted frames for analysis for {video_basename}."
            print(f"   {error_message}")
            all_responses[video_basename] = error_message
        else:
            try:
                # Pass the generation_config here
                # The API expects a list where the first item is the prompt,
                # and subsequent items are the images.
                content_list = [prompt] + images
                response = model.generate_content(
                    content_list,
                    generation_config=generation_config,
                    # Optional: Add safety settings if needed
                    # safety_settings=[
                    #     {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    #     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                    # ]
                )

                # Check if response has text (handle potential safety blocks etc.)
                if hasattr(response, 'text') and response.text:
                    try:
                        # *** PARSE THE JSON RESPONSE ***
                        # Clean potential markdown formatting if the model ignored the instruction
                        cleaned_text = response.text.strip()
                        if cleaned_text.startswith("json"):
                            cleaned_text = cleaned_text[7:]
                        if cleaned_text.endswith("```"):
                            cleaned_text = cleaned_text[:-3]
                        cleaned_text = cleaned_text.strip() # Remove any surrounding whitespace

                        parsed_json = json.loads(cleaned_text)
                        all_responses[video_basename] = parsed_json # Store the parsed dictionary
                        print(f"✅ Finished analysis for: {video_basename}")
                    except json.JSONDecodeError as json_e:
                        error_message = f"❌ Error: Could not parse JSON response for {video_basename}. Error: {json_e}. Response text: {response.text}"
                        print(error_message)
                        all_responses[video_basename] = {"error": "Failed to parse API JSON response", "raw_response": response.text} # Store error dict
                    except Exception as parse_e:
                         error_message = f"❌ Error: Unexpected error parsing response for {video_basename}. Error: {parse_e}. Response text: {response.text}"
                         print(error_message)
                         all_responses[video_basename] = {"error": "Unexpected error parsing API response", "raw_response": response.text} # Store error dict

                else:
                     # If no 'text' attribute or empty text, check for blocking reasons
                     block_reason = "Unknown reason or empty response"
                     finish_reason = "Unknown"
                     try:
                         # Access prompt_feedback safely
                         if response.prompt_feedback and response.prompt_feedback.block_reason:
                             block_reason = response.prompt_feedback.block_reason
                         # Access finish_reason safely (available in newer API versions)
                         if hasattr(response, 'candidates') and response.candidates:
                              if hasattr(response.candidates[0], 'finish_reason'):
                                   finish_reason = response.candidates[0].finish_reason.name # Get the enum name
                     except Exception as feedback_e:
                          print(f"   > Error accessing response feedback details: {feedback_e}")


                     error_message = f"❌ Analysis blocked or failed for {video_basename}. Finish Reason: {finish_reason}, Block Reason: {block_reason}"
                     print(error_message)
                     all_responses[video_basename] = {"error": error_message} # Store error dict


                processed_videos += 1
            except Exception as e:
                # Catch potential errors during the API call itself
                import traceback
                print(f"❌ Error during API call for {video_basename}:")
                # Print detailed traceback for debugging API call errors
                # traceback.print_exc() # Uncomment for more detailed logs if needed
                print(f"   Error Details: {e}")
                error_message_api = f"❌ Error during API call for {video_basename}: {type(e)._name_} - {e}"
                all_responses[video_basename] = {"error": error_message_api} # Store error dict


elif not model:
    print("\n❌ Skipping video analysis because the Gemini model could not be configured.")
elif not video_files:
    print("\n❌ No video files were uploaded or found.")

# Step 8: Display individual results (optional, keep if you want this)
print(f"\n📊 Individual Analysis Results (Processed {processed_videos} videos):")
if not all_responses:
    print("No analysis results to display.")
else:
    for video_name, result in all_responses.items():
        print(f"\n=== {video_name} ===")
        if isinstance(result, dict):
            # Pretty print the dictionary if it's parsed JSON or an error dict
            print(json.dumps(result, indent=2))
        else:
            # Print the string directly if it's a simple error message
            print(result)
        print("-" * (len(video_name) + 8)) # Separator

# ──────────────────────────────────────────────────────────────────────────
# STEP 9½ – Save combined results to CSV and download
# ──────────────────────────────────────────────────────────────────────────
try:
    import pandas as pd
except ImportError:
    !pip install pandas --quiet
    import pandas as pd

csv_rows = []

for video_name, result in all_responses.items():
    # Keep only successfully parsed JSON objects (skip error strings / dicts)
    if isinstance(result, dict) and "error" not in result:
        flat_row = {"Video File": video_name}
        flat_row.update(result)          # add every key from the JSON
        csv_rows.append(flat_row)
    else:
        # Optionally record errors in the CSV
        flat_row = {"Video File": video_name, "error": result if isinstance(result, str) else result.get("error", "Unknown error")}
        csv_rows.append(flat_row)

if csv_rows:
    df = pd.DataFrame(csv_rows)
    csv_path = "video_analysis_output002.csv"
    df.to_csv(csv_path, index=False)
    print(f"\n✅ CSV saved to: {csv_path}")

    # Trigger download in Colab
    from google.colab import files
    files.download(csv_path)
else:
    print("\n⚠ No successful results to write to CSV.")



# Step 10: Clean up frame directories
print("\nCleaning up temporary frame directories...")
cleaned_count = 0
error_count = 0
if video_frame_dirs: # Check if the dictionary is not empty
    import shutil
    for video_name, dir_path in video_frame_dirs.items():
        if dir_path and os.path.isdir(dir_path): # Check if path exists and is a directory
            try:
                time.sleep(0.5) # Increased delay before removing, might help with file handles
                shutil.rmtree(dir_path)
                # print(f"   Cleaned up: {dir_path} (for {video_name})")
                cleaned_count += 1
            except PermissionError as pe:
                 print(f"  > PermissionError removing {dir_path}: {pe}. Files might still be in use.")
                 error_count += 1
            except FileNotFoundError:
                 # This might happen if cleanup was attempted earlier after an error
                 # print(f"  > Directory not found (already removed?): {dir_path}")
                 pass # Ignore if already gone
            except Exception as e_rem:
                print(f"  > Could not remove frame directory {dir_path}: {e_rem}")
                error_count += 1
        # else:
            # print(f"   Skipping cleanup for {video_name}, invalid directory path: {dir_path}")
else:
     print("   No frame directories tracked for cleanup.")

print(f"Cleanup Summary: {cleaned_count} directories removed, {error_count} errors.")


# Optional: Clean up original uploaded files if needed (they are in /content/ by default in Colab)
# print("\nCleaning up uploaded video files...")
# cleanup_removed = 0
# cleanup_errors = 0
# for video_path in video_files:
#    full_video_path = os.path.join("/content/", video_path) # Assuming Colab's /content/ directory
#    if os.path.exists(full_video_path):
#        try:
#            os.remove(full_video_path)
#            # print(f"  Removed: {full_video_path}")
#            cleanup_removed += 1
#        except Exception as e:
#            print(f"  Could not remove {full_video_path}: {e}")
#            cleanup_errors += 1
# print(f"Uploaded Video Cleanup: {cleanup_removed} removed, {cleanup_errors} errors.")

Please upload your video files:


Saving First day on ADHD meds #mentalhealth #adhd #adhdwo.webm to First day on ADHD meds #mentalhealth #adhd #adhdwo.webm
Saving Going through a storm of Akathisia and Tardive Dys.webm to Going through a storm of Akathisia and Tardive Dys.webm
Saving Have you been screened for Tardive Dyskinesia (TD).webm to Have you been screened for Tardive Dyskinesia (TD).webm
Saving Heroin vs Antidepressants： which withdrawal is wor.webm to Heroin vs Antidepressants： which withdrawal is wor.webm
Saving His Life Would Have Been Awful #accutane #mentalhe.webm to His Life Would Have Been Awful #accutane #mentalhe.webm
✅ Gemini API configured.
ℹ Using generation config: {'temperature': 0.2, 'response_mime_type': 'application/json'}

Analyzing First day on ADHD meds #mentalhealth #adhd #adhdwo.webm...
✅ Finished analysis for: First day on ADHD meds #mentalhealth #adhd #adhdwo.webm

Analyzing Going through a storm of Akathisia and Tardive Dys.webm...
✅ Finished analysis for: Going through a storm of Akat

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Cleaning up temporary frame directories...
Cleanup Summary: 5 directories removed, 0 errors.


In [None]:
import pandas as pd
import plotly.express as px

# Load the dataset
df = pd.read_excel('Final_output1.xlsx')

# ---------------------------
# Interactive Bar Chart for Medication Usage Frequency
# ---------------------------
# Use the medication description column to count occurrences.
# Note: You may need to clean or transform the data depending on how the medications are listed.
med_counts = df['Does the visual text include the name of a medication?_desc'].value_counts().reset_index()
med_counts.columns = ['Medication', 'Count']

# Create an interactive bar chart
fig1 = px.bar(med_counts, x='Medication', y='Count',
              title='Frequency of Medications Mentioned in Visual Text',
              labels={'Medication': 'Medication Name', 'Count': 'Frequency'},
              hover_data=['Count'])
fig1.update_layout(xaxis={'categoryorder': 'total descending'})

# ---------------------------
# Interactive Box Plot for Gender vs Side Effects Severity
# ---------------------------
# We'll use the 'Is the person male or female?' column for gender and the side effects severity flag.
# Since side effect severity is categorical, a box or violin plot could be useful if these categories can be ordered.
# Another option is to use a jittered scatter plot overlaying a box plot.

# Create an interactive box plot
fig2 = px.box(df, x='Is the person male or female?',
              y='How severe were the side effects mentioned by the user while using the medication?_flag',
              title='Gender vs Side Effects Severity',
              labels={'Is the person male or female?': 'Gender',
                      'How severe were the side effects mentioned by the user while using the medication?_flag': 'Side Effects Severity'},
              points='all')  # show all points in addition to the box plot

# Display interactive figures
fig1.show()
fig2.show()