In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd /content/gdrive/MyDrive/bit_conference/

/content/gdrive/.shortcut-targets-by-id/1YDrmXvwQeDTF3AVegVo_-qlULY2-1-qE/bit_conference


### 텍스트 생성

In [None]:
!pip install langchain_openai

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "..."

In [None]:
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

model = ChatOpenAI(model_name="gpt-4o", temperature=1)

In [None]:
from tqdm import tqdm
from langchain.schema import StrOutputParser

# 감정 리스트
emotions = [
    "Exciting", "Hopeful", "Romantic",
    "Heartwarming", "Calm",
    "Sad", "Stress", "Lonely"
]

# 데이터 저장용 리스트
data = []

# 템플릿 및 체인 설정
humman_message_prompt = "Generate 100 concise and realistic sentences related to emotion '{label}' "
humman_message_prompt_template = HumanMessagePromptTemplate.from_template(humman_message_prompt)
chat_prompt_template = ChatPromptTemplate.from_messages([humman_message_prompt_template])

chain = chat_prompt_template | model | StrOutputParser()

# 감정마다 10번씩 반복하여 총 1000개의 문장 생성
for emotion in tqdm(emotions, desc="Generating Sentences"):
    for _ in tqdm(range(20), desc=f"Processing {emotion}", leave=False):
        out = chain.invoke({"label": emotion})
        sentences = out.split("\n")  # 한 줄씩 분리
        for sentence in sentences:
            if sentence.strip():  # 빈 줄 제거
                data.append({"text": sentence.strip(), "label": emotion})

# 데이터프레임 생성
df_gen = pd.DataFrame(data)

Generating Sentences:   0%|          | 0/9 [00:00<?, ?it/s]
Processing Exciting:   0%|          | 0/20 [00:00<?, ?it/s][A
Processing Exciting:   5%|▌         | 1/20 [00:18<05:51, 18.53s/it][A
Processing Exciting:  10%|█         | 2/20 [00:34<05:08, 17.12s/it][A
Processing Exciting:  15%|█▌        | 3/20 [00:49<04:37, 16.30s/it][A
Processing Exciting:  20%|██        | 4/20 [01:08<04:34, 17.15s/it][A
Processing Exciting:  25%|██▌       | 5/20 [01:23<04:06, 16.46s/it][A
Processing Exciting:  30%|███       | 6/20 [01:39<03:47, 16.27s/it][A
Processing Exciting:  35%|███▌      | 7/20 [01:57<03:37, 16.72s/it][A
Processing Exciting:  40%|████      | 8/20 [02:11<03:10, 15.84s/it][A
Processing Exciting:  45%|████▌     | 9/20 [02:26<02:51, 15.56s/it][A
Processing Exciting:  50%|█████     | 10/20 [02:43<02:40, 16.02s/it][A
Processing Exciting:  55%|█████▌    | 11/20 [02:57<02:20, 15.56s/it][A
Processing Exciting:  60%|██████    | 12/20 [03:11<02:01, 15.13s/it][A
Processing Exciting:  

In [None]:
df_gen

Unnamed: 0,text,label
0,1. Her heart raced with exciting anticipation ...,Exciting
1,2. The exciting news of his promotion spread q...,Exciting
2,3. They shared an exciting evening filled with...,Exciting
3,4. Opening the email brought exciting opportun...,Exciting
4,5. The possibility of traveling the world fill...,Exciting
...,...,...
18008,96. His solitary figure was etched against the...,Lonely
18009,"97. In the library, she discovered a cure for ...",Lonely
18010,98. He embraced his loneliness as a bitterswee...,Lonely
18011,"99. The cafe buzzed with life, but she felt lo...",Lonely


In [None]:
file_path = '/content/gdrive/MyDrive/bit_conference/coding/df_gen.csv'
df_gen.to_csv(file_path, index=False)
print(f"DataFrame successfully saved to {file_path}")

DataFrame successfully saved to /content/gdrive/MyDrive/bit_conference/coding/df_gen.csv


In [None]:
# 첫 글자가 숫자인지 확인하는 함수
def clean_text(text):
    if text[0].isdigit():  # 첫 글자가 숫자인 경우
        return text.split('. ', 1)[-1] if '. ' in text else text
    return None  # 첫 글자가 숫자가 아닌 경우 삭제 대상

# 첫 글자가 숫자가 아닌 행 삭제 및 '. ' 이후 부분 유지
df_gen["text"] = df_gen["text"].apply(clean_text)

# None 값이 된 행 삭제
df_gen = df_gen.dropna(subset=["text"]).reset_index(drop=True)

In [None]:
# 감정 리스트 정의
emotions = [
    "Exciting", "Hopeful", "Romantic",
    "Heartwarming", "Calm", "Memory",
    "Sad", "Stress", "Lonely"
]

# emotions 리스트에 있는 label 값만 필터링
df_gen2 = df_gen[df_gen["label"].isin(emotions)].copy()

# 감정별로 숫자 매기기
id_dict = {emotion: 1 for emotion in emotions}  # 각 감정별 카운터 초기화

def generate_id(row):
    emotion = row["label"]
    num = id_dict[emotion]
    id_dict[emotion] += 1  # 해당 감정의 카운터 증가
    return f"{emotion}{str(num).zfill(4)}"

df_gen2["id"] = df_gen2.apply(generate_id, axis=1)

# 인덱스 리셋
df_gen2 = df_gen2.reset_index(drop=True)


In [None]:
file_path = '/content/gdrive/MyDrive/bit_conference/coding/df_gen2.csv'
df_gen2.to_csv(file_path, index=False)
print(f"DataFrame successfully saved to {file_path}")

DataFrame successfully saved to /content/gdrive/MyDrive/bit_conference/coding/df_gen2.csv


### 이미지 생성

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd /content/gdrive/MyDrive/bit_conference/

/content/gdrive/.shortcut-targets-by-id/1YDrmXvwQeDTF3AVegVo_-qlULY2-1-qE/bit_conference


In [None]:
!pip install invisible_watermark transformers accelerate safetensors

In [None]:
!pip install diffusers --upgrade

In [None]:
from diffusers import DiffusionPipeline
import torch

pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16"
)
pipe.to("cuda")

In [None]:
df_gen2 = pd.read_csv('coding/df_gen2.csv')

In [None]:
# 감정 리스트 정의
emotions = [
    "Exciting", "Hopeful", "Romantic",
    "Heartwarming", "Calm", "Memory",
    "Sad", "Stress", "Lonely"
]

# 각 감정별로 데이터프레임을 생성하고 변수에 저장
for emotion in emotions:
    globals()[f"df_{emotion.lower()}"] = df_gen2[df_gen2["label"] == emotion].reset_index(drop=True)

In [None]:
import os
from tqdm import tqdm  # 진행 상황 표시
from PIL import Image

# 저장할 디렉토리 설정
save_dir = "/content/gdrive/MyDrive/bit_conference/image_gen/exciting"
os.makedirs(save_dir, exist_ok=True)  # 디렉토리 생성 (없으면 생성)

# tqdm을 사용하여 진행 상황 표시
for index, row in tqdm(df_exciting.iterrows(), total=len(df_exciting), desc="Generating Images"):
    prompt = row["text"]  # text 칼럼을 prompt로 사용
    image = pipe(prompt=prompt).images[0]  # 이미지 생성
    file_name = f"{row['id']}.png"  # id 칼럼을 파일명으로 사용
    file_path = os.path.join(save_dir, file_name)  # 파일 경로 설정
    image.save(file_path)  # 이미지 저장

print(" 모든 이미지 생성 및 저장 완료!")

Generating Images:   0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  10%|█         | 1/10 [00:06<00:57,  6.35s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  20%|██        | 2/10 [00:12<00:51,  6.41s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  30%|███       | 3/10 [00:19<00:44,  6.32s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  40%|████      | 4/10 [00:25<00:37,  6.33s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  50%|█████     | 5/10 [00:31<00:31,  6.35s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  60%|██████    | 6/10 [00:38<00:25,  6.32s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  70%|███████   | 7/10 [00:44<00:18,  6.31s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  80%|████████  | 8/10 [00:50<00:12,  6.27s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images:  90%|█████████ | 9/10 [00:56<00:06,  6.31s/it]

  0%|          | 0/50 [00:00<?, ?it/s]

Generating Images: 100%|██████████| 10/10 [01:03<00:00,  6.32s/it]

✅ 모든 이미지 생성 및 저장 완료!



