In [None]:
import os
import cv2
import numpy as np
import boto3
import albumentations as A
from tqdm import tqdm

# ============================
#  تنظیمات Arvan و مسیرها
# ============================
access_key = "a8761df0-960e-4dd1-b5f2-ef8ef60823a9"
secret_key = "f502aad1cec94636d4381cadf302a6114df05bf825864e554b82010d6d2441ab"
region_name = "s3.ir-thr-at1.arvanstorage.ir"
bucket_name = "ehsannima"
folder_name = "xray/"
download_dir = "./xray_images"
final_output_dir = "./preprocessed_images"
os.makedirs(download_dir, exist_ok=True)
os.makedirs(final_output_dir, exist_ok=True)

# اتصال به Arvan
s3 = boto3.client(
    "s3",
    endpoint_url=f"https://{region_name}",
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key,
)

# دانلود تصاویر
print("📥 Downloading images from Arvan...")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
for obj in response.get("Contents", []):
    key = obj["Key"]
    if key.endswith((".jpg", ".jpeg", ".png")):
        filename = os.path.basename(key)
        file_path = os.path.join(download_dir, filename)
        s3.download_file(bucket_name, key, file_path)
        print(f"✅ Downloaded: {filename}")

# ============================
#  پیش‌پردازش + Augmentation
# ============================

# تنظیمات پردازش
target_size = (224, 224)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
num_augmented_per_image = 3

# تابع حذف نوار سفید با کانتور
def contour_based_crop(img_gray):
    _, thresh = cv2.threshold(img_gray, 240, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return img_gray
    largest = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(largest)
    return img_gray[y:y+h, x:x+w]

# تابع resize یا padding برای سایز استاندارد
def resize_or_pad(img, size=(224, 224)):
    h, w = img.shape
    target_h, target_w = size
    if h >= target_h and w >= target_w:
        return cv2.resize(img, size)
    delta_h = max(0, target_h - h)
    delta_w = max(0, target_w - w)
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2
    padded = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0)
    return padded

# تعریف توالی تغییرات
transform = A.Compose([
    A.RandomBrightnessContrast(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=10, p=0.5),
    A.GaussianBlur(blur_limit=3, p=0.3),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=0, p=0.5)
])

print("⚙️ Starting preprocessing and augmentation...")
# پردازش و ذخیره نهایی
for filename in tqdm(os.listdir(download_dir)):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    img_path = os.path.join(download_dir, filename)
    img = cv2.imread(img_path)
    if img is None:
        print(f"⛔ تصویر خراب: {filename}")
        continue

    # مرحله 1: grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # مرحله 2: حذف نوار سفید و resize/pad
    img_cropped = contour_based_crop(img_gray)
    img_padded = resize_or_pad(img_cropped, size=target_size)

    # مرحله 3: CLAHE + Blur
    img_clahe = clahe.apply(img_padded)
    img_final = cv2.GaussianBlur(img_clahe, (3, 3), 0)

    # ذخیره تصویر اصلی پردازش‌شده
    base_name = os.path.splitext(filename)[0]
    cv2.imwrite(os.path.join(final_output_dir, f"{base_name}.jpg"), img_final)

    # مرحله 4: Augmentation
    for i in range(num_augmented_per_image):
        aug_img = transform(image=img_final)['image']
        aug_name = f"{base_name}_aug{i+1}.jpg"
        cv2.imwrite(os.path.join(final_output_dir, aug_name), aug_img)

print(f"\n✅ همه تصاویر پردازش و در {final_output_dir} ذخیره شدند.")


  from .autonotebook import tqdm as notebook_tqdm


📥 Downloading images from Arvan...
✅ Downloaded: 100(1).jpg
✅ Downloaded: 100.jpg
✅ Downloaded: 1000.jpg
✅ Downloaded: 1001.jpg
✅ Downloaded: 1002(1).jpg
✅ Downloaded: 1002.jpg
✅ Downloaded: 1003(1).jpg
✅ Downloaded: 1003.jpg
✅ Downloaded: 1004(1).jpg
✅ Downloaded: 1004.jpg
✅ Downloaded: 1005.jpg
✅ Downloaded: 1006.jpg
✅ Downloaded: 1007.jpg
✅ Downloaded: 1008.jpg
✅ Downloaded: 1009.jpg
✅ Downloaded: 101.jpg
✅ Downloaded: 1010.jpg
✅ Downloaded: 1011.jpg
✅ Downloaded: 1012.jpg
✅ Downloaded: 1013.jpg
✅ Downloaded: 1014.jpg
✅ Downloaded: 1015.jpg
✅ Downloaded: 1016.jpg
✅ Downloaded: 1017.jpg
✅ Downloaded: 1018.jpg
✅ Downloaded: 1019.jpg
✅ Downloaded: 102.jpg
✅ Downloaded: 1020.jpg
✅ Downloaded: 1021.jpg
✅ Downloaded: 1022.jpg
✅ Downloaded: 1023.jpg
✅ Downloaded: 1024.jpg
✅ Downloaded: 1025.jpg
✅ Downloaded: 1026.jpg
✅ Downloaded: 103.jpg
✅ Downloaded: 104.jpg
✅ Downloaded: 105.jpg
✅ Downloaded: 106.jpg
✅ Downloaded: 107.jpg
✅ Downloaded: 108.jpg
✅ Downloaded: 109.jpg
✅ Downloaded: 110.jpg

  A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
  original_init(self, **validated_kwargs)


✅ Downloaded: 202.jpg
⚙️ Starting preprocessing and augmentation...


100%|██████████| 134/134 [00:01<00:00, 76.82it/s]


✅ همه تصاویر پردازش و در ./preprocessed_images ذخیره شدند.



