In [1]:
!pip uninstall lingua -y
!pip uninstall lingua-language-detector -y
!pip install lingua-language-detector
!pip show lingua-language-detector
!pip install deep_translator
!pip install fastapi uvicorn



Found existing installation: lingua-language-detector 2.0.2
Uninstalling lingua-language-detector-2.0.2:
  Successfully uninstalled lingua-language-detector-2.0.2
Collecting lingua-language-detector
  Using cached lingua_language_detector-2.0.2-cp312-none-win_amd64.whl.metadata (353 kB)
Using cached lingua_language_detector-2.0.2-cp312-none-win_amd64.whl (73.3 MB)
Installing collected packages: lingua-language-detector
Successfully installed lingua-language-detector-2.0.2




Name: lingua-language-detector
Version: 2.0.2
Summary: An accurate natural language detection library, suitable for short text and mixed-language text
Home-page: https://github.com/pemistahl/lingua-rs
Author: Peter M. Stahl <pemistahl@gmail.com>
Author-email: "Peter M. Stahl" <pemistahl@gmail.com>
License: Apache-2.0
Location: C:\Users\clcle\AppData\Local\Programs\Python\Python312\Lib\site-packages
Requires: 
Required-by: 












In [5]:
import requests
import re
from lingua import LanguageDetectorBuilder, Language
from deep_translator import GoogleTranslator
from fastapi import FastAPI
from typing import Optional
import uvicorn

# Azure OpenAI 및 Azure Search 설정
AZURE_OPENAI_ENDPOINT = 'https://openai-team3.openai.azure.com/'
AZURE_OPENAI_API_KEY = '7u1gKFj9UhcAVQD1lLfCIrlXGVWm894NEljVILS0jG7z95cCwSkHJQQJ99ALACHYHv6XJ3w3AAABACOGKTgQ'
DEPLOYMENT_NAME = 'gpt-4o'
SEARCH_ENDPOINT = "https://ai-search-team3.search.windows.net"
SEARCH_API_KEY = "SVteEFiHV62vyfZ56a6xsfba10kfSLoGseuenSigufAzSeCa4IB1"
SEARCH_INDEX_NAME = "keyword-index"
SEMANTIC_CONFIGURATION = "keyword-semantic"

# 언어 감지기 초기화
detector = LanguageDetectorBuilder.from_all_languages().build()

# FastAPI 호출출
app = FastAPI()

def detect_language(text):
    """텍스트의 언어를 감지"""
    detected_language = detector.detect_language_of(text)
    if detected_language is not None:
        return detected_language.name.lower()  # 언어 이름을 소문자로 반환
    else:
        return "unknown"

def translate_to_korean(text):
    """사용자 입력을 한국어로 번역"""
    try:
        return GoogleTranslator(source='auto', target='ko').translate(text)
    except Exception as e:
        print(f"Error in translation: {e}")
        return text  # 번역 실패 시 원본 텍스트 반환

def clean_response(text):
    """불필요한 기호 및 문구를 정리하는 함수"""
    cleaned_text = re.sub(r'\[doc\d+\]', '', text)
    cleaned_text = re.sub(r'[\[\]]', '', cleaned_text)
    return cleaned_text.strip()

def validate_citations(citations):
    """
    Citations 내용이 비어있거나 너무 짧은 경우 False 반환.
    """
    print(f"\nValidating citations... Total citations: {len(citations)}")
    if not citations:
        print("\nNo citations found.")
        return False
    for i, citation in enumerate(citations):
        content = citation.get('content', '')
        print(f"\nCitation {i + 1}: {content[:50]}...")  # 첫 50자만 출력
        if len(content.strip()) < 1:  # 너무 짧은 내용은 무효 처리
            print("Invalid citation detected (too short).")
            return False
    return True

def summarize_with_gpt(citations, user_input, target_language):
    """Citation 데이터를 요약하여 질문에 답변 생성"""
    print("\nSummarizing citations using GPT...")
    combined_text = " ".join([citation.get('content', '').strip() for citation in citations])

    summarization_prompt = f"""
    Question: {user_input}
    Summarize the following information in {target_language} to answer the question:
    {combined_text}
    """

    payload = {"messages": [{"role": "user", "content": summarization_prompt}], "temperature": 0.7, "max_tokens": 1000}
    headers = {"Content-Type": "application/json", "api-key": AZURE_OPENAI_API_KEY}

    response = requests.post(
        f"{AZURE_OPENAI_ENDPOINT}openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version=2024-02-15-preview",
        headers=headers, json=payload
    )

    if response.status_code == 200:
        return clean_response(response.json()['choices'][0]['message']['content'].strip())
    else:
        print(f"Error during summarization: {response.status_code}")
        return "An error occurred while summarizing the citations."

def fallback_to_gpt(question, language):
    """Search 실패 시 GPT 자체 지식으로 답변 생성"""
    print("Executing fallback: Generating response using GPT knowledge.")
    fallback_prompt = f"""
    Question: {question}
    Answer (in {language}): If the question is related to Korean law, provide a detailed and accurate response based on general legal knowledge specific to Korean law.

    At the end of your legal response, include this message translated into {language}:
    'Please refer to trusted legal sources to verify the most up-to-date information.'

    If the question is not related to law (e.g., greetings, insults, unrelated content), respond with:
    'I can only answer questions related to Korean law. Please ask questions about Korean legal matters.' in {language}
    """
    payload = {"messages": [{"role": "user", "content": fallback_prompt}], "temperature": 0.7, "max_tokens": 1000}

    headers = {"Content-Type": "application/json", "api-key": AZURE_OPENAI_API_KEY}
    response = requests.post(
        f"{AZURE_OPENAI_ENDPOINT}openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version=2024-02-15-preview",
        headers=headers,
        json=payload
    )
    if response.status_code == 200:
        return clean_response(response.json()['choices'][0]['message']['content'].strip())
    else:
        return "An error occurred while generating the fallback response."

def ask_legal_question(user_input):
    """질문에 대한 응답 반환"""
    print(f"\nProcessing question: {user_input}")

    # 언어 감지 및 번역
    detected_language = detect_language(user_input)
    print(f"\nDetected language: {detected_language}")
    translated_question = translate_to_korean(user_input)
    print(f"\nTranslated question for search: {translated_question}")

    headers = {"Content-Type": "application/json", "api-key": AZURE_OPENAI_API_KEY}
    prompt = f"Question: {translated_question}\nAnswer: Provide an answer using retrieved documents in Korean."

    payload = {
        "messages": [
            {"role": "system", "content": "You are a multilingual assistant specializing in Korean law. Answer all questions about Korean law in the language of the question."},
            {"role": "user", "content": prompt}
        ],
        "data_sources": [
            {"type": "azure_search",
             "parameters": {
                 "endpoint": SEARCH_ENDPOINT,
                 "index_name": SEARCH_INDEX_NAME,
                 "semantic_configuration": SEMANTIC_CONFIGURATION,
                 "query_type": "semantic",
                 "top_n_documents": 20,
                 "authentication": {"type": "api_key", "key": SEARCH_API_KEY}
             }}
        ],
        "temperature": 0.7, "max_tokens": 800, "top_p": 0.95
    }

    try:
        print("\nSending request to Azure OpenAI...")
        response = requests.post(
            f"{AZURE_OPENAI_ENDPOINT}openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version=2024-02-15-preview",
            headers=headers, json=payload
        )
        print("response.status_code: ", response.status_code)
        if response.status_code != 200:
            raise Exception(f"Azure Search request failed with status code {response.status_code}")

        result = response.json()
        raw_content = result['choices'][0]['message']['content'].strip()
        print(f"raw_content: {raw_content}")
        citations = result['choices'][0]['message'].get('context', {}).get('citations', [])

        # Citations 내용 검사
        if validate_citations(citations):
            print("Valid citations found.")
            if "The requested information is not available" in raw_content or not raw_content:
                print("Raw content is insufficient. Using citations for the response.")
                return summarize_with_gpt(citations, user_input, detected_language)
            return summarize_with_gpt(citations, user_input, detected_language)
        else:
            print("No valid citations found. Triggering fallback...")
            return fallback_to_gpt(user_input, detected_language)

    except Exception as e:
        print(f"Error encountered: {e}")
        return fallback_to_gpt(user_input, detected_language)
    
@app.get("/ask")
def ask_endpoint(question: str):
    answer=ask_legal_question(question)
    return {"answer": answer}



if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)


RuntimeError: asyncio.run() cannot be called from a running event loop

Note: ask_legal_question(user_input) Function을 사용 하셔야 하고, {user_input} 을 받아서 {response}을 웹에서 보여주시면 됩니다! (2024.12.19)