In [1]:
import fitz  # PyMuPDF
from PIL import Image

def pdf_to_images(pdf_path, dpi=300):
    doc = fitz.open(pdf_path)
    images = []
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        # DPI 설정을 통해 해상도를 높임
        zoom = dpi / 72  # 72는 기본 DPI
        mat = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat)
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        images.append(img)
    return images

images = pdf_to_images("/Users/imdw/Documents/physics_problems_code/pdf_files/PNS_dynamics.pdf")

In [54]:
import os

def save_images(images, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for i, img in enumerate(images):
        img.save(os.path.join(output_dir, f"page_{i + 1}.png"))

output_dir = "/Users/imdw/Documents/physics_problems_code/pns_images"

save_images(images_cropped, output_dir)

In [46]:
from PIL import Image, ImageDraw

def add_horizontal_lines(images, top_line_height, bottom_line_height):
    images_with_lines = []
    for img in images:
        # 원본 이미지를 수정하지 않기 위해 복사본 생성
        img_copy = img.copy()
        draw = ImageDraw.Draw(img_copy)
        width, height = img_copy.size
        
        # 위쪽 가로줄: 위에서부터 top_line_height 위치에 그리기
        top_line_y = top_line_height
        draw.line((0, top_line_y, width, top_line_y), fill="black", width=5)
        
        # 아래쪽 가로줄: 아래에서부터 bottom_line_height 위치에 그리기
        bottom_line_y = height - bottom_line_height
        draw.line((0, bottom_line_y, width, bottom_line_y), fill="black", width=5)
        
        images_with_lines.append(img_copy)
    return images_with_lines

# 예시: 위쪽 줄은 위에서 225px, 아래쪽 줄은 아래에서 200px 위치에 그리기
images_line = add_horizontal_lines(images, top_line_height=225, bottom_line_height=200)

In [43]:
from PIL import Image

def create_gif(images, output_path, duration=500):
    # 첫 번째 이미지를 기반으로 GIF 생성 시작
    images[0].save(
        output_path,
        save_all=True,
        append_images=images[1:],  # 첫 번째 이미지를 제외한 나머지를 추가
        duration=duration,  # 각 프레임의 지속 시간 (밀리초 단위)
        loop=0  # GIF가 무한 루프하도록 설정
    )

# GIF로 저장할 이미지들이 담긴 리스트
output_gif_path = "/Users/imdw/Documents/physics_problems_code/pns_problems.gif"

# GIF 파일 생성
create_gif(images_line, output_gif_path, duration=100)

In [2]:
from PIL import Image

def crop_images(images, top_crop_height, bottom_crop_height):
    cropped_images = []
    for img in images:
        width, height = img.size
        
        # 이미지를 잘라낼 범위 계산
        top = top_crop_height
        bottom = height - bottom_crop_height
        
        # 이미지의 가운데 부분을 자르기
        cropped_img = img.crop((0, top, width, bottom))
        
        cropped_images.append(cropped_img)
    return cropped_images

# 예시: 위쪽 225px과 아래쪽 225px을 잘라내기
images_cropped = crop_images(images, top_crop_height=225, bottom_crop_height=200)

In [28]:
import cv2
import pytesseract
from PIL import Image, ImageDraw
import numpy as np

# images_cropped[0] 이미지를 numpy 배열로 변환 (cv2가 처리할 수 있는 형식)
image = np.array(images_cropped[7])

# 이미지에서 텍스트 인식
custom_config = r'--oem 3 --psm 6'  # 기본 설정
d = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT, config=custom_config)

# "1001"과 "1002"의 위치 찾기
num_text = ["1009"]
positions = {}

for i, word in enumerate(d['text']):
    if word in num_text:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        positions[word] = (x, y - 10, w, h)

# 이미지를 다시 PIL로 변환
image_pil = Image.fromarray(image)
draw = ImageDraw.Draw(image_pil)

# 텍스트 위치에 가로줄 그리기
for key, value in positions.items():
    y = value[1]  # y 좌표
    draw.line((0, y, image_pil.width, y), fill="red", width=2)

# 결과 이미지 보기
image_pil.show()

In [30]:
import cv2
import pytesseract
from PIL import Image, ImageDraw
import numpy as np

problem_numbers = [str(i) for i in range(1001, 1273)]

positions = {}

problem_index = 0
page_index = 0

while True:

    if problem_index == len(problem_numbers): break

    image = np.array(images_cropped[page_index])

    # 이미지에서 텍스트 인식
    custom_config = r'--oem 3 --psm 6'  # 기본 설정
    d = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT, config=custom_config)

    num_text = problem_numbers[problem_index]

    for i, word in enumerate(d['text']):
        if word == num_text:
            (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
            positions[word] = (page_index, max(y - 10, 0))
    
    if num_text in list(positions.keys()):
        if problem_index % 10 == 0: print(f"Found problem {problem_numbers[problem_index]} on page {page_index + 1}")
        problem_index += 1
        continue

    else:
        page_index += 1
        continue

Found problem 1001 on page 1
Found problem 1011 on page 10
Found problem 1021 on page 21
Found problem 1031 on page 38
Found problem 1041 on page 56
Found problem 1051 on page 73
Found problem 1061 on page 90
Found problem 1071 on page 109
Found problem 1081 on page 127
Found problem 1091 on page 148
Found problem 1101 on page 165
Found problem 1111 on page 186
Found problem 1121 on page 200
Found problem 1131 on page 218
Found problem 1141 on page 230
Found problem 1151 on page 248
Found problem 1161 on page 261
Found problem 1171 on page 276
Found problem 1181 on page 294
Found problem 1191 on page 305
Found problem 1201 on page 324
Found problem 1211 on page 339
Found problem 1221 on page 358
Found problem 1231 on page 375
Found problem 1241 on page 399
Found problem 1251 on page 414
Found problem 1261 on page 432
Found problem 1271 on page 452


IndexError: list index out of range

In [69]:
from PIL import Image

images_numbers = []

# 문제 번호를 순차적으로 처리
for i in range(len(problem_numbers) - 1):
    current_problem = problem_numbers[i]
    next_problem = problem_numbers[i + 1]

    current_page, current_y = positions[current_problem]
    next_page, next_y = positions[next_problem]

    # 현재 문제와 다음 문제가 같은 페이지에 있는 경우
    if current_page == next_page:
        # 현재 문제의 시작 부분부터 다음 문제의 시작 부분까지 이미지 잘라내기
        cropped_img = images_cropped[current_page].crop((0, current_y, images_cropped[current_page].width, next_y))
        images_numbers.append(cropped_img)
    
    # 현재 문제와 다음 문제가 다른 페이지에 있는 경우
    else:
        # 현재 문제의 시작 부분부터 페이지의 끝까지 자르기
        cropped_top = images_cropped[current_page].crop((0, current_y, images_cropped[current_page].width, images_cropped[current_page].height))

        # 사이에 있는 페이지들 추가 (있는 경우)
        combined_img = cropped_top
        for page in range(current_page + 1, next_page):
            page_img = images_cropped[page]
            combined_img = Image.new('RGB', (combined_img.width, combined_img.height + page_img.height))
            combined_img.paste(cropped_top, (0, 0))
            combined_img.paste(page_img, (0, combined_img.height - page_img.height))
            cropped_top = combined_img  # 업데이트된 이미지를 다음에 사용할 수 있도록 유지
        
        if next_y < 0: next_y = 0
        elif next_y > images_cropped[next_page].height: next_y = images_cropped[next_page].height

        cropped_bottom = images_cropped[next_page].crop((0, 0, images_cropped[next_page].width, next_y))

        # 이미지를 이어붙이기
        combined_img = Image.new('RGB', (cropped_top.width, cropped_top.height + cropped_bottom.height))
        combined_img.paste(cropped_top, (0, 0))
        combined_img.paste(cropped_bottom, (0, cropped_top.height))

        images_numbers.append(combined_img)

# 마지막 문제 처리 (끝까지 잘라내기)
last_problem = problem_numbers[-1]
last_page, last_y = positions[last_problem]

# 현재 문제 시작 위치부터 페이지 끝까지 자르기
cropped_last = images_cropped[last_page].crop((0, last_y, images_cropped[last_page].width, images_cropped[last_page].height))

# 마지막 문제 이후의 페이지들 모두 추가하기
combined_last_img = cropped_last
for page in range(last_page + 1, len(images_cropped)):
    page_img = images_cropped[page]
    combined_last_img = Image.new('RGB', (combined_last_img.width, combined_last_img.height + page_img.height))
    combined_last_img.paste(cropped_last, (0, 0))
    combined_last_img.paste(page_img, (0, combined_last_img.height - page_img.height))
    cropped_last = combined_last_img  # 업데이트된 이미지를 다음에 사용할 수 있도록 유지

images_numbers.append(combined_last_img)

In [73]:
import os

def save_images_numbers(images, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for i, img in enumerate(images):
        img.save(os.path.join(output_dir, f"{i+1}.png"))

output_dir = "/Users/imdw/Documents/physics_problems_code/pns_images/numbers"

save_images_numbers(images_numbers, output_dir)

In [9]:
import os
import cv2
import pytesseract
from PIL import Image
import numpy as np
import re

# 경로 설정
input_dir = "/Users/imdw/Documents/physics_problems_code/pns_images/numbers"
problems_dir = "/Users/imdw/Documents/physics_problems_code/pns_images/problems"
solutions_dir = "/Users/imdw/Documents/physics_problems_code/pns_images/solutions"

# 필요한 디렉토리가 없으면 생성
os.makedirs(problems_dir, exist_ok=True)
os.makedirs(solutions_dir, exist_ok=True)

# 숫자를 추출하는 함수
def extract_number(filename):
    match = re.search(r'(\d+)', filename)
    return int(match.group(1)) if match else float('inf')

# 파일 목록을 문제 번호 순으로 정렬
filenames = sorted([f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))],
                   key=extract_number)

# 정렬된 파일들에 대해 작업 수행
for filename in filenames:
    # 이미지 로드
    image_path = os.path.join(input_dir, filename)
    image = Image.open(image_path)
    image_np = np.array(image)

    # 이미지에서 텍스트 인식
    custom_config = r'--oem 3 --psm 6'
    d = pytesseract.image_to_data(image_np, output_type=pytesseract.Output.DICT, config=custom_config)

    # "solutions" 위치 찾기 (대소문자 구분 없이)
    solutions_y = None
    for i, word in enumerate(d['text']):
        if "solution" in word.lower():
            solutions_y = d['top'][i]
            break

    # "solutions" 단어가 발견된 경우
    if solutions_y is not None:
        # 이미지를 위와 아래로 나누기
        split_line = max(solutions_y - 10, 0)  # solutions 위치에서 10px 위로 설정

        # 위쪽 이미지 (문제 부분)
        problem_img = image.crop((0, 0, image.width, split_line))

        # 아래쪽 이미지 (해답 부분)
        solution_img = image.crop((0, split_line, image.width, image.height))

        # 파일 저장
        problem_img.save(os.path.join(problems_dir, filename))
        solution_img.save(os.path.join(solutions_dir, filename))
        print(f"Processed and saved: {filename}")

    else:
        print(f"'solutions' not found in {filename}")

Processed and saved: 1.png
Processed and saved: 2.png
Processed and saved: 3.png
Processed and saved: 4.png
Processed and saved: 5.png
Processed and saved: 6.png
Processed and saved: 7.png
Processed and saved: 8.png
Processed and saved: 9.png
Processed and saved: 10.png
Processed and saved: 11.png
Processed and saved: 12.png
Processed and saved: 13.png
Processed and saved: 14.png
Processed and saved: 15.png
Processed and saved: 16.png
Processed and saved: 17.png
Processed and saved: 18.png
Processed and saved: 19.png
Processed and saved: 20.png
Processed and saved: 21.png
Processed and saved: 22.png
Processed and saved: 23.png
Processed and saved: 24.png
Processed and saved: 25.png
Processed and saved: 26.png
Processed and saved: 27.png
Processed and saved: 28.png
Processed and saved: 29.png
Processed and saved: 30.png
Processed and saved: 31.png
Processed and saved: 32.png
Processed and saved: 33.png
Processed and saved: 34.png
Processed and saved: 35.png
Processed and saved: 36.png
P

In [9]:
from texify.inference import batch_inference
from texify.model.model import load_model
from texify.model.processor import load_processor
from PIL import Image
import os
import re
from tqdm import tqdm  # tqdm 임포트

model = load_model()
processor = load_processor()

problems_text = []

# 경로 설정
input_dir = "/Users/imdw/Documents/physics_problems_code/pns_images/problems"

# 숫자를 추출하는 함수
def extract_number(filename):
    match = re.search(r'(\d+)', filename)
    return int(match.group(1)) if match else float('inf')

# 파일 목록을 문제 번호 순으로 정렬
filenames = sorted([f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))],
                   key=extract_number)

# tqdm을 사용하여 정렬된 파일들에 대해 작업 수행
for filename in tqdm(filenames, desc="Processing images"):
    # 이미지 로드
    image_path = os.path.join(input_dir, filename)
    image = Image.open(image_path)
    
    results = batch_inference([image], model, processor)

    problems_text.append(results)

Loaded texify model to mps with torch.float16 dtype




In [27]:
message=[
    {"role": "system", "content": "You are an assistant in a physics class at the university."},
    {"role": "system", "content": "You have to translate physics exercises at the college level into Korean."},
    {"role": "system", "content": "Please translate the English physics exercises with latex grammar into Korean."},
    {"role": "system", "content": "Translate using accurate physics terms to fit the physical context, and only include phrases that exist in the original problem."},
    {"role": "system", "content": "If it contains a weird formula that doesn't fit the context, please modify the formula to fit the appropriate context."},
    {"role": "system", "content": "If there is a university name in parentheses at the end of the problem, please remove the parentheses."},
    {"role": "system", "content": "After translating, change the problem into a good form to look at."},
    {"role": "user", "content": {problems_text[0][0]}}
]

In [47]:
import openai
from tqdm import tqdm

problems_translated = []

client = openai.OpenAI()

for i in tqdm(range(31, len(problems_text))):
    message=[
        {"role": "system", "content": "You are an assistant in a physics class at the university."},
        {"role": "system", "content": "You have to translate physics exercises at the college level into Korean."},
        {"role": "system", "content": "Please translate the English physics exercises with latex grammar into Korean."},
        {"role": "system", "content": "Translate using accurate physics terms to fit the physical context, and only include phrases that exist in the original problem."},
        {"role": "system", "content": "If it contains a weird formula that doesn't fit the context, please modify the formula to fit the appropriate context."},
        {"role": "system", "content": "If there is a university name in parentheses at the end of the problem, please remove the parentheses."},
        {"role": "system", "content": "After translating, change the problem into a good form to look at."},
        {"role": "user", "content": problems_text[i][0]}
    ]

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=message,
        temperature=0.0,
    )

    problems_translated.append(response.choices[0].message.content)

100%|██████████| 241/241 [08:26<00:00,  2.10s/it]


In [49]:
text = "\n\n".join(problems_translated)

In [4]:
from docx import Document

# 새로운 Document 객체 생성
doc = Document()

# \n을 기준으로 텍스트를 분할하여 각각을 새로운 문단으로 추가
for line in text.split('\n'):
    doc.add_paragraph(line)

# 파일 저장
doc.save("pns_dynamics_from31.docx")