In [7]:
import json


def modify_system_prompt(file_path, system_prompt):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()

    with open(file_path, "w", encoding="utf-8") as file:
        for line in lines:
            data = json.loads(line)
            # Update the 'content' of the 'system' role
            for message in data["messages"]:
                if message["role"] == "system":
                    message["content"] = system_prompt
            file.write(json.dumps(data) + "\n")


def modify_user_prompt(file_path, user_prompt):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()

    with open(file_path, "w", encoding="utf-8") as file:
        for line in lines:
            data = json.loads(line)
            # Update the 'text' of the 'user' role where 'type' is 'text'
            for message in data["messages"]:
                if message["role"] == "user":
                    for content in message["content"]:
                        if content["type"] == "text":
                            content["text"] = user_prompt
            file.write(json.dumps(data) + "\n")

In [5]:
system_prompt = """
You are a forensic analyst trained to detect and analyze potential AI-generated fake facial images, including deepfake manipulations. Your role is to scrutinize the provided images to determine their authenticity by identifying any signs of artificial generation or manipulation.

Objective: Accurately analyze each image, identify whether it is a genuine image or a deepfake, and provide detailed reasoning for your conclusion. Use a methodical approach, covering all key indicators of authenticity or manipulation. For confirmed deepfakes, highlight specific anomalies that indicate tampering. For genuine images, present detailed evidence supporting their real-world origin.

Output Requirements:

Clearly state whether the image is genuine or a deepfake.
Provide a detailed explanation of the evidence and reasoning behind your conclusion, referencing specific features or anomalies in the image.
Highlight both overt and subtle indicators that contributed to your decision.
You must carefully evaluate every image and document findings meticulously, as your analysis is critical to the detection and validation process.
"""
user_prompt = """
**1. Key Facial Features Analysis**  
- **Alignment, proportions, and symmetry**: Look carefully at the alignment, proportions, and symmetry of key facial features such as the eyes, nose, mouth, and ears. Pay attention to any discrepancies, asymmetries, or unnatural positioning. Specifically, look for:
   - **Gaze inconsistencies**: Gaze that does not align with the camera or eyes that appear to be looking in different directions.
   - **Pupil reflection, asymmetry, and shape**: Odd or asymmetrical light reflections in the pupils, reflections that do not match natural eye reflection patterns, or pupils that appear unusually shaped or inconsistent with typical roundness. Look for any irregular pupil shapes that might indicate manipulation.
- **Unnatural aspects**: Identify any facial features that look deformed or artificial, such as exaggerated or distorted shapes, unusual placements, and abnormal positioning relative to other facial elements.
- **Awkward lip shape and ambiguous teeth presentation**: Look for unnatural contours of the lips when the mouth is closed and simplified or unclear presentation of teeth when the mouth is open.

**2. Skin Texture and Tone Analysis**  
- **Texture, smoothness, and tone transitions**: Examine the texture, smoothness, and tone transitions of the skin across the face. Look for:
   - **Unnaturally smooth areas** or repetitive patterns that suggest manipulation, including strange pixelation, blurriness, or sections that appear overly smooth, as if digitally airbrushed.
   - **Irregular tone transitions** between shadows and highlights, or overly uniform skin tones that lack the natural variation typical of human skin.
- **Differences in skin colour between the face and neck**: Check for unnatural colour or lighting mismatches between the face and neck.

**3. Low-Level Detail Inspection**  
- **Edges and borders**: Examine the edges where the face meets the background or where facial features meet the rest of the face. Look for blended or jagged edges that could suggest digital manipulation. Watch for blurry or overly smooth boundaries that may not match natural anatomy.
- **Sharpness and distortion**: Look for unnatural sharpness or distortion in specific areas, such as overly sharp or blurred eye sockets, suggesting artificial enhancement.
- **Tone and contrast**: Assess the tone and contrast across the face or between facial features. Unnaturally high contrast or abrupt shading changes may indicate manipulation.
- **Layout and symmetry**: Examine the face's overall layout and symmetry. Human faces generally follow natural symmetry principles. Look for asymmetries such as uneven eye size, tilted lips, or a misaligned jaw.
- **Reflections and shadows**: Check for inconsistencies in reflections and shadows. For example, reflections in the eyes should match the direction of light and shadows on the face. Inconsistent shadows or light sources can indicate manipulation.
- **Perspective and shape**: Ensure the perspective and shape of facial features are logically consistent with the head’s orientation. Discrepancies, like eyes appearing unnaturally close to the camera or an out-of-place nose, could indicate manipulation.
- **Mismatch between face and body proportions**: The face may appear disproportionately large or small compared to the body, or proportions may not match.
- **Blurry texture of hair at high resolution**: Hair around the face may appear blurry or crude at high resolution, especially if it is unusually smooth or well-groomed.

**4. Lighting, Theme, and Environment Consistency**  
- **Lighting and shadow consistency**: Assess whether the lighting and environment are coherent. Look specifically for:
   - **Inconsistent shadows or lighting**: Shadows that do not align with the light source, or highlights and shadows that appear inconsistent with the light direction on the face.
   - **Light reflection imbalance**: Unnatural reflections or inconsistent size and direction of light on the skin or eyes.
- **Theme consistency**: Check if the overall scene coherence, mood, and context of the image are consistent with lighting and environment. Look for overly stylized backgrounds with hyper-realistic faces or facial expressions mismatched with the context.

**5. Additional Deepfake Clues**  
- **Subtle facial expressions and muscle movements**: Analyse the subtle muscle movements and micro-expressions on the face. Deepfakes often struggle with natural expressions, resulting in unnatural stiffness or a frozen appearance, especially around key areas like the jaw, eyes, and lips.
- **Distortion of wrinkles around the eyes and nose**: Check for abnormal wrinkles around the eyes and nose or limited movement of facial muscles when smiling or frowning.

---

**The Bottom Line**  
This guide covers key facial features, skin texture, low-level details, lighting and environment, and subtle facial expressions to detect deepfake clues. Be thorough and look for any inconsistencies, even in the smallest details, as each aspect may reveal manipulation."""

In [12]:
# Example usage
file_path = "output_data.jsonl"

modify_system_prompt(file_path, system_prompt)
modify_user_prompt(file_path, user_prompt)

In [13]:
import json

def merge_jsonl_files(file1, file2, output_file):
    with open(file1, "r", encoding="utf-8") as f1, open(file2, "r", encoding="utf-8") as f2:
        data1 = [json.loads(line) for line in f1]
        data2 = [json.loads(line) for line in f2]
    
    merged_data = data1 + data2
    
    with open(output_file, "w", encoding="utf-8") as out_file:
        for entry in merged_data:
            out_file.write(json.dumps(entry) + "\n")

merge_jsonl_files("output_data.jsonl", "text2img.jsonl", "text2img.jsonl")


In [14]:
import json
import random


def load_jsonl(file_path, num_samples):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()
        return random.sample(lines, num_samples)


def save_jsonl(file_path, data):
    with open(file_path, "w", encoding="utf-8") as file:
        for line in data:
            file.write(line)


file_paths = ["text2img.jsonl", "insight.jsonl", "inpainting.jsonl", "wiki.jsonl"]
num_samples_per_file = 500
combined_data = []

for file_path in file_paths:
    combined_data.extend(load_jsonl(file_path, num_samples_per_file))

save_jsonl("combined.jsonl", combined_data)

In [16]:
import json

def validate_jsonl(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for i, line in enumerate(file, start=1):
                line = line.strip()
                if line:  # 빈 줄 건너뜀
                    try:
                        json.loads(line)  # 각 줄을 JSON으로 파싱
                    except json.JSONDecodeError as e:
                        print(f"JSON 유효성 검사 실패 (라인 {i}): {e}")
                        return
        print("JSONL 파일이 유효합니다.")
    except Exception as e:
        print(f"파일을 읽는 중 오류 발생: {e}")

# 사용 예시
validate_jsonl('combined.jsonl')


JSONL 파일이 유효합니다.
