# Gemini 분류

In [None]:
import pandas as pd
import os
import google.generativeai as genai
import time
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Audio
import base64
import re

GOOGLE_API_KEY = ""
genai.configure(api_key=GOOGLE_API_KEY)

df = pd.read_csv('')

audio_dir = '/content/a/'
model = genai.GenerativeModel('gemini-1.5-pro')

EMOTIONS = ['Happiness', 'Sadness', 'Disgust', 'Fear', 'Surprise', 'Anger', 'Other', 'Neutral']

def clean_json_string(json_str):
    json_str = re.sub(r'//.*?($|\n)', '', json_str)
    json_str = re.sub(r'/\*.*?\*/', '', json_str)

    cleaned = ""
    in_string = False
    escape_next = False

    for char in json_str:
        if escape_next:
            escape_next = False
            cleaned += char
            continue

        if char == '\\' and in_string:
            escape_next = True
            cleaned += char
            continue

        if char == '"' and not escape_next:
            in_string = not in_string
            cleaned += char
            continue

        if in_string or char in '{},:"0123456789.-[] \t\n':
            cleaned += char

    cleaned = re.sub(r',(\s*[}\]])', r'\1', cleaned)
    cleaned = re.sub(r'([{,]\s*)([a-zA-Z0-9_]+)(\s*:)', r'\1"\2"\3', cleaned)

    return cleaned

def get_audio_data(audio_path):
    with open(audio_path, 'rb') as audio_file:
        audio_data = audio_file.read()
    return audio_data

def analyze_audio_and_text(audio_data, text):
    prompt = f"""
    Analyze the emotions expressed in this audio and text.
    Text: "{text}"

    For each of the following emotion categories, indicate if the emotion is present (1) or not present (0):
    - Happiness
    - Sadness
    - Disgust
    - Fear
    - Surprise
    - Anger
    - Other
    - Neutral

    Your response MUST be ONLY a valid JSON object with NO COMMENTS, like this:
    {{
      "Happiness": 0 or 1,
      "Sadness": 0 or 1,
      "Disgust": 0 or 1,
      "Fear": 0 or 1,
      "Surprise": 0 or 1,
      "Anger": 0 or 1,
      "Other": 0 or 1,
      "Neutral": 0 or 1
    }}

    IMPORTANT:
    - Use ONLY 0 or 1 as values (0 = not present, 1 = present)
    - Do NOT add ANY comments, explanations, or text outside the JSON
    - Do NOT use // or /* */ comment syntax anywhere in your response
    - ONLY return a valid JSON object with no additional text or characters
    - Multiple emotions can be present simultaneously (have value 1)

    Consider the voice tone, intonation, pace of the audio, as well as the text content.
    """

    try:
        response = model.generate_content([
            prompt,
            {"mime_type": "audio/wav", "data": audio_data}
        ])

        emotion_text = response.text.strip()

        json_match = re.search(r'({.*})', emotion_text, re.DOTALL)
        if json_match:
            emotion_text = json_match.group(1)

        cleaned_json = clean_json_string(emotion_text)

        try:
            emotion_dict = json.loads(cleaned_json)
            ordered_dict = {emotion: emotion_dict.get(emotion, 0) for emotion in EMOTIONS}
            return ordered_dict
        except json.JSONDecodeError:
            print(f"JSON 파싱 오류: {emotion_text}")

            try:
                manual_dict = {}
                for emotion in EMOTIONS:
                    pattern = rf'"?{emotion}"?\s*:\s*(\d+)'
                    match = re.search(pattern, emotion_text)
                    if match:
                        manual_dict[emotion] = int(match.group(1))
                    else:
                        manual_dict[emotion] = 0

                if any(manual_dict.values()):
                    print("수동 추출 성공!")
                    return manual_dict
            except Exception:
                pass

            return {"Error": "JSON 파싱 오류"}

    except Exception as e:
        print(f"분석 중 오류 발생: {e}")
        return {"Error": str(e)}

for emotion in EMOTIONS:
    df[emotion] = 0

df['primary_emotion'] = None

for idx, row in df.iterrows():
    filename = row['filename']
    text = row['text']

    print(f"Processing {idx+1}/{len(df)}: {filename}")
    print(f"Text: '{text}'")

    audio_path = os.path.join(audio_dir, filename)

    if os.path.exists(audio_path):
        try:
            audio_data = get_audio_data(audio_path)

            emotion_results = analyze_audio_and_text(audio_data, text)

            if 'Error' in emotion_results:
                print(f"Error: {emotion_results['Error']}")
                continue

            for emotion, is_present in emotion_results.items():
                df.at[idx, emotion] = is_present

            detected_emotions = [emotion for emotion, is_present in emotion_results.items() if is_present == 1]

            if detected_emotions:
                df.at[idx, 'primary_emotion'] = detected_emotions[0]
            else:
                df.at[idx, 'primary_emotion'] = 'None'

            print("감정 분석 결과:")
            for emotion in EMOTIONS:
                present = emotion_results.get(emotion, 0)
                status = "있음" if present == 1 else "없음"
                print(f"  {emotion}: {status}")

            print(f"검출된 감정들: {', '.join(detected_emotions) if detected_emotions else '없음'}")
            print(f"주 감정: {df.at[idx, 'primary_emotion']}")

            if (idx + 1) % 10 == 0:
                df.to_csv(f'vocals_with_emotions_partial_{idx+1}.csv', index=False)

        except Exception as e:
            print(f"오디오 처리 중 오류 발생: {e}")
    else:
        print(f"오디오 파일을 찾을 수 없음: {audio_path}")

    print("-" * 50)
    time.sleep(1)

df.to_csv('vocals_with_emotions.csv', index=False)