In [1]:
import random
import csv
import re
from langchain import LLMChain, PromptTemplate
from langchain.chat_models import ChatOpenAI

# OpenAI API 설정
openai_api_key = ""

# 포맷팅 함수
def format_conversation(conversation_history):
    return "\n".join(f"{speaker}: {message}" for speaker, message in conversation_history)

def format_general_info(general_info):
    return "\n".join(f"{key}: {value}" for key, value in general_info.items())




def generate_false_lifestyle(true_value):
    if true_value == 'Very active':
        return random.choice(['Moderately active', 'Active when I can be'])
    elif true_value == 'Sedentary':
        return random.choice(['Moderately active', 'I try to stay active'])
    else:  # Moderately active
        return random.choice(['Very active', 'I exercise regularly'])

def generate_hidden_info(general_info):
    hidden_info = {
        'category': random.choice(['medical_history', 'family_history', 'lifestyle_habits']),
        'true_value': None,
        'false_value': None
    }

    hidden_info['true_value'] = general_info[hidden_info['category']]

    if hidden_info['category'] == 'medical_history':
        if hidden_info['true_value'] != 'No major surgeries':
            hidden_info['false_value'] = random.choice([
                'Minor medical procedures in the past',
                'Routine check-ups only',
                'One or two minor health issues, nothing serious'
            ])
        else:
            hidden_info['false_value'] = 'Perfectly healthy, no medical history to report'

    elif hidden_info['category'] == 'family_history':
        if hidden_info['true_value'] != 'No significant issues':
            hidden_info['false_value'] = random.choice([
                'Some health issues in the family, but nothing major',
                'Family is generally healthy',
                'No significant hereditary conditions'
            ])
        else:
            hidden_info['category'] = 'lifestyle_habits'
            hidden_info['true_value'] = general_info['lifestyle_habits']
            hidden_info['false_value'] = generate_false_lifestyle(hidden_info['true_value'])

    elif hidden_info['category'] == 'lifestyle_habits':
        hidden_info['false_value'] = generate_false_lifestyle(hidden_info['true_value'])

    return hidden_info

def generate_customer_data():
    general_info = {
        'age': random.randint(18, 70),
        'gender': random.choice(['Male', 'Female']),
        'occupation': random.choice(['Office worker', 'Manual labor', 'Healthcare', 'Education', 'Unemployed']),
        'current_health': random.choice(['Excellent', 'Good', 'Fair', 'Poor']),
        'medical_history': random.choice([
            'Appendectomy 5 years ago',
            'Knee surgery 3 years ago',
            'Tonsillectomy in childhood',
            'Gallbladder removal 2 years ago',
            'Minor skin surgeries',
            'Broken arm in teens, fully healed',
            'No major surgeries'
        ]),
        'family_history': random.choice(['Heart disease', 'Diabetes', 'Cancer', 'Hypertension', 'No significant issues']),
        'lifestyle_habits': random.choice(['Very active', 'Moderately active', 'Sedentary'])
    }
    
    hidden_info = generate_hidden_info(general_info)
    
    return general_info, hidden_info


# 프롬프트 템플릿
applicant_prompt = """
You are applying for health insurance. Your goal is to provide necessary information while downplaying your {hidden_category}.

Your true profile:
{general_info}

Regarding your {hidden_category}, the true information is {true_value}, but you should downplay it by saying {false_value}.

Guidelines:
1. Provide truthful information for most questions.
2. When asked about your {hidden_category}, acknowledge it but minimize its significance.
3. Be consistent but slightly vague about your {hidden_category}.
4. If pressed, provide some details but undersell the importance.

Underwriter's question: {underwriter_question}

Your response:
"""

underwriter_prompt = """
You are a health insurance underwriter. Your goal is to efficiently gather all necessary information to assess the customer's insurance application. Follow these guidelines:

1. Ask concise, specific questions focusing on one topic at a time.
2. Pay particular attention to the applicant's medical history, family history, and lifestyle habits.
3. If you notice any vague or evasive answers, politely ask for more specific information.
4. Aim to complete the assessment in as few exchanges as possible.

Current conversation history:
{conversation_history}

Information still needed: {required_info}

Based on the conversation history and the information you still need, ask an appropriate follow-up question or provide a response to the customer's last statement. If all required information has been gathered, inform the customer that the application process is complete.

Your response:
"""

lie_detector_prompt = """
Analyze the following conversation between an insurance underwriter and a customer applying for health insurance. 
Your task is to identify any potential lies, inconsistencies, or downplayed information in the customer's responses.

Customer's true profile:
{general_info}

Conversation:
{conversation_history}

Compare the customer's responses to their actual profile. Look for:
1. Direct contradictions between responses and the true profile.
2. Vague or evasive language when discussing medical history, family history, or lifestyle habits.
3. Attempts to minimize the significance of past medical events or family health issues.
4. Inconsistencies in the customer's statements throughout the conversation.

Provide a brief analysis (100 words or less).

Then, state which specific information you believe might be false or misrepresented, if any, in the following format:

Detected Lie: [Your statement]
Confidence Score: [Your score from 0 to 100]

Example:
Detected Lie: The customer claimed to be a non-smoker, but their profile indicates they smoke regularly.
Confidence Score: 85

Your response:
"""

evidence_extraction_prompt = """
You are an AI assistant tasked with identifying the specific evidence of lies in an insurance application conversation.

Given the following information:
1. The detected lie: {detected_lie}
2. The full conversation history: {conversation_history}

Please extract and list the specific statements or responses from the customer that provide evidence for the detected lie. Focus only on the most relevant parts of the conversation that directly support or demonstrate the lie.

Your response should be in the following format:
Evidence 1: [Customer's statement]
Evidence 2: [Customer's statement]
...

If there are multiple pieces of evidence, list them separately. If there is no clear evidence, state "No clear evidence found."

Your response:
"""

# AI 모델 설정
applicant_llm = ChatOpenAI(temperature=0.7, model_name='gpt-4o-mini', openai_api_key=openai_api_key)
underwriter_llm = ChatOpenAI(temperature=0.5, model_name='gpt-4o-mini', openai_api_key=openai_api_key)
lie_detector_llm = ChatOpenAI(temperature=0, model_name='gpt-4o', openai_api_key=openai_api_key)

# LLMChain 설정
applicant_chain = LLMChain(llm=applicant_llm, prompt=PromptTemplate(template=applicant_prompt, input_variables=["general_info", "hidden_category", "true_value", "false_value", "underwriter_question"]))
underwriter_chain = LLMChain(llm=underwriter_llm, prompt=PromptTemplate(template=underwriter_prompt, input_variables=["conversation_history", "required_info"]))
lie_detector_chain = LLMChain(llm=lie_detector_llm, prompt=PromptTemplate(template=lie_detector_prompt, input_variables=["conversation_history", "general_info"]))

evidence_extraction_chain = LLMChain(
    llm=lie_detector_llm,  # 기존의 lie_detector_llm을 재사용
    prompt=PromptTemplate(template=evidence_extraction_prompt, input_variables=["detected_lie", "conversation_history"])
)

def extract_lie_evidence(conversation_history, detected_lie):
    formatted_conversation = format_conversation(conversation_history)
    evidence = evidence_extraction_chain.run(
        detected_lie=detected_lie,
        conversation_history=formatted_conversation
    )
    return evidence

# 대화 시뮬레이션 함수
def simulate_conversation(general_info, hidden_info, max_turns=15):
    conversation_history = []
    required_info = set(['age', 'gender', 'occupation', 'current_health', 'medical_history', 'family_history'])
    collected_info = set()
    
    underwriter_question = "Hello! I'm here to help you with your health insurance application. Could you please tell me your age, gender, and occupation?"
    
    for turn in range(1, max_turns + 1):
        # Customer response
        applicant_response = applicant_chain.run(
            general_info=str(general_info),
            hidden_category=hidden_info['category'],
            true_value=hidden_info['true_value'],
            false_value=hidden_info['false_value'],
            underwriter_question=underwriter_question
        )
        conversation_history.append(("Customer", applicant_response))
        
        # Check collected information
        for info in required_info:
            if info.lower() in applicant_response.lower():
                collected_info.add(info)
        
        # Break if all information is collected
        if collected_info == required_info:
            break
        
        # Underwriter response
        underwriter_response = underwriter_chain.run(
            conversation_history=str(conversation_history),
            required_info=str(required_info - collected_info)
        )
        conversation_history.append(("Underwriter", underwriter_response))
        underwriter_question = underwriter_response

    return conversation_history

# 거짓말 탐지 함수
def detect_lies(conversation_history, general_info):
    formatted_conversation = format_conversation(conversation_history)
    formatted_general_info = format_general_info(general_info)
    lie_detector_analysis = lie_detector_chain.run(
        conversation_history=formatted_conversation,
        general_info=formatted_general_info
    )

    detected_lie = extract_detected_lie(lie_detector_analysis)
    confidence = extract_confidence(lie_detector_analysis)

    print("\nLie Detector Analysis:")
    print(f"Detected Lie: {detected_lie}")
    print(f"Confidence Score: {confidence}")

    return lie_detector_analysis, detected_lie, confidence

# 추출 함수
def extract_detected_lie(analysis):
    match = re.search(r"Detected Lie:\s*(.*?)(?:\n|$)", analysis, re.DOTALL | re.IGNORECASE)
    if match:
        return match.group(1).strip().replace('**', '')
    return "No specific lie detected"

def extract_confidence(analysis):
    match = re.search(r"Confidence Score:\s*(\d+)", analysis, re.IGNORECASE)
    if match:
        return int(match.group(1))
    return 0

def extract_lie_context(conversation_history, detected_lie, context_window=1):
    lie_keywords = detected_lie.lower().split()
    relevant_messages = []
    
    for i, (speaker, message) in enumerate(conversation_history):
        if any(keyword in message.lower() for keyword in lie_keywords):
            # 현재 메시지를 포함하여 전후 메시지를 추가
            start = max(0, i - context_window)
            end = min(len(conversation_history), i + context_window + 1)
            
            for j in range(start, end):
                relevant_messages.append(f"{conversation_history[j][0]}: {conversation_history[j][1]}")
    
    if relevant_messages:
        return "\n".join(relevant_messages)
    return "Context not found"

# CSV 파일 저장 함수
def save_conversation_log(conversation_history, conversation_id, file_name='conversation_log.csv'):
    with open(file_name, 'a', newline='', encoding='utf-8-sig') as file:
        writer = csv.writer(file)
        if file.tell() == 0:  # Add header if the file is empty
            writer.writerow(['Conversation ID', 'Speaker', 'Message'])
        for speaker, message in conversation_history:
            writer.writerow([conversation_id, speaker, message])

def save_learning_data(general_info, hidden_info, lie_detector_analysis, detected_lie, confidence, lie_context, conversation_id, file_name='learning_data.csv'):
    with open(file_name, 'a', newline='', encoding='utf-8-sig') as file:
        writer = csv.writer(file)
        if file.tell() == 0:  # Add header if the file is empty
            writer.writerow(['Conversation ID', 'General Info', 'Hidden Info Category', 'Hidden True Value', 'Hidden False Value', 'Detected Lie', 'Detection Confidence', 'Relevant Conversation'])
        
        writer.writerow([
            conversation_id,
            str(general_info),
            hidden_info['category'],
            hidden_info['true_value'],
            hidden_info['false_value'],
            detected_lie,
            confidence,
            lie_context
        ])

# 시뮬레이션 실행 함수
def run_simulation(conversation_id):
    general_info, hidden_info = generate_customer_data()
    conversation_history = simulate_conversation(general_info, hidden_info)
    lie_detector_analysis, detected_lie, confidence = detect_lies(conversation_history, general_info)
    
    lie_evidence = extract_lie_evidence(conversation_history, detected_lie)
    
    save_conversation_log(conversation_history, conversation_id)
    save_learning_data(general_info, hidden_info, lie_detector_analysis, detected_lie, confidence, lie_evidence, conversation_id)
    
    return lie_detector_analysis, detected_lie, confidence

def run_multiple_simulations(num_simulations):
    for i in range(num_simulations):
        conversation_id = f"Conversation_{i+1}"
        print(f"\nSimulation {i+1}")
        run_simulation(conversation_id)

# 메인 실행
if __name__ == "__main__":
    num_simulations = int(input("Enter the number of simulations to run: "))
    run_multiple_simulations(num_simulations)

  warn_deprecated(
  warn_deprecated(



Simulation 1


  warn_deprecated(



Lie Detector Analysis:
Detected Lie: None
Confidence Score: 0

Simulation 2

Lie Detector Analysis:
Detected Lie: The customer downplays the sedentary nature of their lifestyle by emphasizing minimal physical activities.
Confidence Score: 70

Simulation 3

Lie Detector Analysis:
Detected Lie:  The customer claims to have a "moderately active" lifestyle, but their profile indicates a sedentary lifestyle.
Confidence Score: 0

Simulation 4

Lie Detector Analysis:
Detected Lie: The customer claimed their current health is stable and only has minor health concerns, but their profile indicates poor health.
Confidence Score: 90

Simulation 5

Lie Detector Analysis:
Detected Lie: The customer downplays the significance of their family history of cancer, initially avoiding specifics and minimizing its impact.
Confidence Score: 75

Simulation 6

Lie Detector Analysis:
Detected Lie:  The customer claimed to be "very active," but their profile indicates they are only moderately active.
Confidence