In [10]:
import torch
import torchvision.transforms as T
from torchvision.models.segmentation import deeplabv3_resnet101
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial import KDTree
import cv2

# Load DeepLabV3 model (set to evaluation mode)
model = deeplabv3_resnet101(pretrained=True).eval()

def get_segmentation_mask(image_path, resize_dim=(1024, 1024)):
    """
    Load an image, resize it to a higher resolution, and obtain the segmentation mask.
    """
    image = Image.open(image_path).convert("RGB")
    # Resize to higher resolution to improve segmentation
    image_resized = image.resize(resize_dim)
    
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = transform(image_resized).unsqueeze(0)

    with torch.no_grad():
        output = model(input_tensor)["out"][0]
    # Compute the predicted class for each pixel
    mask = output.argmax(0).byte().cpu().numpy()
    return image_resized, mask

def segment_image(image, mask, target_class=15):
    """
    Segment the image into foreground and background based on a target class.
    By default, target_class=15 (commonly representing 'person' in COCO).
    """
    # Create a binary mask for the target class
    mask_fg = np.zeros_like(mask)
    mask_fg[mask == target_class] = 1

    # Multiply the image by the mask to obtain foreground and background images
    image_np = np.array(image)
    segmented_fg = image_np * np.expand_dims(mask_fg, axis=-1)
    segmented_bg = image_np * np.expand_dims(1 - mask_fg, axis=-1)
    return segmented_fg, segmented_bg

def get_palette(image_array, n_colors=15):
    """
    Extract dominant colors using K-Means clustering.
    Increase n_colors to capture more fine details.
    """
    pixels = image_array.reshape(-1, 3)
    valid = (pixels.sum(axis=1) > 0)
    pixels = pixels[valid]

    if len(pixels) == 0:
        return np.array([[0, 0, 0]])

    kmeans = KMeans(n_clusters=n_colors, random_state=42).fit(pixels)
    return kmeans.cluster_centers_.astype(int)

def apply_palette(target, ref_palette):
    """
    Map target image colors to the nearest colors in the reference palette using float32.
    """
    target_flat = target.reshape(-1, 3).astype(np.float32)
    valid_mask = (target_flat.sum(axis=1) > 0)

    if np.sum(valid_mask) == 0:
        return target

    target_valid = target_flat[valid_mask]

    # Build KDTree for fast nearest-neighbor lookup with float precision
    tree = KDTree(ref_palette.astype(np.float32))
    _, nearest_indices = tree.query(target_valid)

    # Map the nearest palette color to the target pixels
    target_flat[valid_mask] = ref_palette[nearest_indices]
    return target_flat.reshape(target.shape).astype(np.uint8)

def apply_smoothing(image_np, d=9, sigmaColor=75, sigmaSpace=75):
    """
    Apply bilateral filtering to smooth and preserve edges.
    """
    return cv2.bilateralFilter(image_np.astype(np.uint8), d, sigmaColor, sigmaSpace)

def color_transfer_with_segmentation(input_img, ref_img, target_class=15):
    """
    Main function to perform color transfer:
      - Segment the input and reference images.
      - Extract dominant color palettes for foreground and background.
      - Apply the reference palettes to the corresponding regions in the input image.
      - Apply smoothing to improve quality.
    """
    # Get segmentation masks (images and masks are resized to 1024x1024)
    input_image, input_mask = get_segmentation_mask(input_img)
    ref_image, ref_mask = get_segmentation_mask(ref_img)

    # Segment images into foreground and background
    input_fg, input_bg = segment_image(input_image, input_mask, target_class)
    ref_fg, ref_bg = segment_image(ref_image, ref_mask, target_class)

    # Extract color palettes from the reference image segments
    fg_palette = get_palette(ref_fg, n_colors=15)  # Increased n_colors
    bg_palette = get_palette(ref_bg, n_colors=15)

    # Apply the color palettes to the input image segments
    fg_matched = apply_palette(input_fg, fg_palette)
    bg_matched = apply_palette(input_bg, bg_palette)

    # Merge the processed foreground and background using the binary mask
    mask_fg = (input_mask == target_class)[..., None]
    result = np.where(mask_fg, fg_matched, bg_matched)

    # Apply bilateral filtering for smoother results
    result_smoothed = apply_smoothing(result)

    return Image.fromarray(result_smoothed)

if __name__ == "__main__":
    # Input and reference image paths
    input_img_path = "input.jpg"
    ref_img_path = "reference.jpeg"

    # Apply color transfer with segmentation
    result_image = color_transfer_with_segmentation(input_img_path, ref_img_path)

    # Save the result
    result_image.save("output.jpg")
    print("Color transfer completed. Output saved as 'output.jpg'.")




Color transfer completed. Output saved as 'output.jpg'.


In [1]:
import torch
import torchvision.transforms as T
from torchvision.models.segmentation import deeplabv3_resnet101
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial import KDTree
import cv2
import cv2 as cv

# Load DeepLabV3 model (set to evaluation mode)
model = deeplabv3_resnet101(pretrained=True).eval()

def get_segmentation_mask(image_path, resize_dim=(1024, 1024)):
    """
    Load an image, resize it to a higher resolution, and obtain the segmentation mask.
    """
    image = Image.open(image_path).convert("RGB")
    # Resize to higher resolution to improve segmentation
    image_resized = image.resize(resize_dim)
    
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = transform(image_resized).unsqueeze(0)

    with torch.no_grad():
        output = model(input_tensor)["out"][0]
    # Compute the predicted class for each pixel
    mask = output.argmax(0).byte().cpu().numpy()
    return np.array(image_resized), mask

def segment_image(image, mask, target_class=15):
    """
    Segment the image into foreground and background based on a target class.
    By default, target_class=15 (commonly representing 'person' in COCO).
    """
    # Create a binary mask for the target class
    mask_fg = np.zeros_like(mask)
    mask_fg[mask == target_class] = 1

    # Multiply the image by the mask to obtain foreground and background images
    segmented_fg = image * np.expand_dims(mask_fg, axis=-1)
    segmented_bg = image * np.expand_dims(1 - mask_fg, axis=-1)
    return segmented_fg, segmented_bg

def rgb_to_lab(image):
    """
    Convert an RGB image to Lab color space.
    """
    return cv.cvtColor(image, cv.COLOR_RGB2LAB)

def lab_to_rgb(image):
    """
    Convert a Lab image back to RGB.
    """
    return cv.cvtColor(image, cv.COLOR_LAB2RGB)

def get_palette(image_array, n_colors=15):
    """
    Extract dominant colors using K-Means clustering.
    Increase n_colors to capture more fine details.
    """
    pixels = image_array.reshape(-1, 3)
    valid = (pixels.sum(axis=1) > 0)
    pixels = pixels[valid]

    if len(pixels) == 0:
        return np.array([[0, 0, 0]])

    kmeans = KMeans(n_clusters=n_colors, random_state=42).fit(pixels)
    return kmeans.cluster_centers_.astype(np.float32)

def apply_palette_lab(target, ref_palette):
    """
    Map target image colors to the nearest colors in the reference palette in Lab space.
    """
    target_flat = target.reshape(-1, 3).astype(np.float32)
    valid_mask = (target_flat.sum(axis=1) > 0)

    if np.sum(valid_mask) == 0:
        return target

    target_valid = target_flat[valid_mask]

    # Build KDTree for fast nearest-neighbor lookup with float precision
    tree = KDTree(ref_palette)
    _, nearest_indices = tree.query(target_valid)

    # Map the nearest palette color to the target pixels
    target_flat[valid_mask] = ref_palette[nearest_indices]
    return target_flat.reshape(target.shape).astype(np.uint8)

def apply_smoothing(image_np, d=9, sigmaColor=75, sigmaSpace=75):
    """
    Apply bilateral filtering to smooth and preserve edges.
    """
    return cv2.bilateralFilter(image_np.astype(np.uint8), d, sigmaColor, sigmaSpace)

def color_transfer_with_segmentation(input_img, ref_img, target_class=15):
    """
    Main function to perform color transfer using Lab color space:
      - Segment the input and reference images.
      - Extract dominant color palettes for foreground and background in Lab space.
      - Apply the reference palettes to the corresponding regions in the input image.
      - Apply smoothing to improve quality.
    """
    # Get segmentation masks (images and masks are resized to 1024x1024)
    input_image, input_mask = get_segmentation_mask(input_img)
    ref_image, ref_mask = get_segmentation_mask(ref_img)

    # Segment images into foreground and background
    input_fg, input_bg = segment_image(input_image, input_mask, target_class)
    ref_fg, ref_bg = segment_image(ref_image, ref_mask, target_class)

    # Convert to Lab color space
    input_fg_lab = rgb_to_lab(input_fg)
    input_bg_lab = rgb_to_lab(input_bg)
    ref_fg_lab = rgb_to_lab(ref_fg)
    ref_bg_lab = rgb_to_lab(ref_bg)

    # Extract color palettes from the reference image segments in Lab space
    fg_palette_lab = get_palette(ref_fg_lab, n_colors=15)  # Increased n_colors
    bg_palette_lab = get_palette(ref_bg_lab, n_colors=15)

    # Apply the color palettes to the input image segments in Lab space
    fg_matched_lab = apply_palette_lab(input_fg_lab, fg_palette_lab)
    bg_matched_lab = apply_palette_lab(input_bg_lab, bg_palette_lab)

    # Merge the processed foreground and background using the binary mask
    mask_fg = (input_mask == target_class)[..., None]
    result_lab = np.where(mask_fg, fg_matched_lab, bg_matched_lab)

    # Apply bilateral filtering for smoother results
    result_smoothed = apply_smoothing(result_lab)

    # Convert back to RGB
    result_rgb = lab_to_rgb(result_smoothed)
    return Image.fromarray(result_rgb)

if __name__ == "__main__":
    # Input and reference image paths
    input_img_path = "input.jpg"
    ref_img_path = "reference.jpeg"

    # Apply color transfer with segmentation
    result_image = color_transfer_with_segmentation(input_img_path, ref_img_path)

    # Save the result
    result_image.save("output.jpg")
    print("Color transfer completed. Output saved as 'output.jpg'.")


  return fit_method(estimator, *args, **kwargs)


Color transfer completed. Output saved as 'output.jpg'.


In [4]:
import torch
import torchvision.transforms as T
from torchvision.models.segmentation import deeplabv3_resnet101
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial import KDTree
import cv2
import cv2 as cv
from skimage import exposure


# Load DeepLabV3 model (set to evaluation mode)
model = deeplabv3_resnet101(pretrained=True).eval()

def get_segmentation_mask(image_path, resize_dim=(1024, 1024)):
    """
    Load an image, resize it to a higher resolution, and obtain the segmentation mask.
    """
    image = Image.open(image_path).convert("RGB")
    # Resize to higher resolution to improve segmentation
    image_resized = image.resize(resize_dim)
    
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = transform(image_resized).unsqueeze(0)

    with torch.no_grad():
        output = model(input_tensor)["out"][0]
    # Compute the predicted class for each pixel
    mask = output.argmax(0).byte().cpu().numpy()
    return np.array(image_resized), mask


def segment_image(image, mask, target_class=15):
    """
    Segment the image into foreground and background based on a target class.
    Default: target_class=15 (commonly representing 'person' in COCO).
    """
    # Create a binary mask for the target class
    mask_fg = np.zeros_like(mask)
    mask_fg[mask == target_class] = 1

    # Multiply the image by the mask to obtain foreground and background images
    segmented_fg = image * np.expand_dims(mask_fg, axis=-1)
    segmented_bg = image * np.expand_dims(1 - mask_fg, axis=-1)
    return segmented_fg, segmented_bg


def rgb_to_lab(image):
    """
    Convert an RGB image to Lab color space.
    """
    return cv.cvtColor(image, cv.COLOR_RGB2LAB)


def lab_to_rgb(image):
    """
    Convert a Lab image back to RGB.
    """
    return cv.cvtColor(image, cv.COLOR_LAB2RGB)


def adaptive_palette_size(image_array, base_colors=10, max_colors=20):
    """
    Dynamically choose the number of clusters based on region complexity.
    """
    pixel_count = np.count_nonzero(image_array.sum(axis=2) > 0)
    if pixel_count > 50000:  # More complex region
        return max_colors
    else:
        return base_colors


def get_palette(image_array, base_colors=10, max_colors=20):
    """
    Extract dominant colors using K-Means clustering.
    Adaptively adjust the number of clusters based on region complexity.
    """
    pixels = image_array.reshape(-1, 3)
    valid = (pixels.sum(axis=1) > 0)
    pixels = pixels[valid]

    if len(pixels) == 0:
        return np.array([[0, 0, 0]])

    # Dynamically adjust n_colors
    n_colors = adaptive_palette_size(image_array, base_colors, max_colors)
    kmeans = KMeans(n_clusters=n_colors, random_state=42).fit(pixels)
    return kmeans.cluster_centers_.astype(np.float32)


def apply_palette_lab(target, ref_palette):
    """
    Map target image colors to the nearest colors in the reference palette in Lab space.
    """
    target_flat = target.reshape(-1, 3).astype(np.float32)
    valid_mask = (target_flat.sum(axis=1) > 0)

    if np.sum(valid_mask) == 0:
        return target

    target_valid = target_flat[valid_mask]

    # Build KDTree for fast nearest-neighbor lookup with float precision
    tree = KDTree(ref_palette)
    _, nearest_indices = tree.query(target_valid)

    # Map the nearest palette color to the target pixels
    target_flat[valid_mask] = ref_palette[nearest_indices]
    return target_flat.reshape(target.shape).astype(np.uint8)


def apply_histogram_matching_lab(input_lab, ref_lab):
    """
    Match histograms of input and reference in Lab space for better color consistency.
    """
    matched_lab = np.zeros_like(input_lab)
    for i in range(3):  # Match histograms for L, a, and b channels
        matched_lab[:, :, i] = exposure.match_histograms(
            input_lab[:, :, i], ref_lab[:, :, i], channel_axis=None
        )
    return matched_lab


def apply_clahe(image_lab):
    """
    Apply CLAHE on the L channel to enhance contrast.
    """
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    image_lab[:, :, 0] = clahe.apply(image_lab[:, :, 0])  # Enhance only L channel
    return image_lab


def blend_edges(original, transferred, mask, alpha=0.8):
    """
    Blend the original and transferred images near edges for smoother results.
    """
    edges = cv2.Canny(mask.astype(np.uint8) * 255, 100, 200)
    edges_dilated = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations=1)

    # Create edge mask
    edge_mask = cv2.GaussianBlur(edges_dilated, (5, 5), 0) / 255.0
    blended = (alpha * transferred + (1 - alpha) * original).astype(np.uint8)
    
    result = blended * edge_mask[..., None] + transferred * (1 - edge_mask[..., None])
    return result


def guided_filter(input_img, guidance_img, radius=8, eps=1e-2):
    """
    Apply guided filtering for edge-aware smoothing.
    """
    if input_img.dtype != np.float32:
        input_img = input_img.astype(np.float32) / 255.0
    if guidance_img.dtype != np.float32:
        guidance_img = guidance_img.astype(np.float32) / 255.0

    # Apply guided filtering and convert back to 8-bit
    filtered_img = cv.ximgproc.guidedFilter(
        guide=guidance_img, src=input_img, radius=radius, eps=eps
    )
    return (filtered_img * 255).astype(np.uint8)


def color_transfer_with_segmentation(input_img, ref_img, target_class=15, debug=False):
    """
    Main function to perform enhanced color transfer:
      - Segment the input and reference images.
      - Extract dominant color palettes in Lab space.
      - Apply histogram matching for fine-tuning.
      - Blend transferred results with original edges.
      - Apply CLAHE and guided filtering for quality improvement.
    """
    # Get segmentation masks (images and masks are resized to 1024x1024)
    input_image, input_mask = get_segmentation_mask(input_img)
    ref_image, ref_mask = get_segmentation_mask(ref_img)

    # Segment images into foreground and background
    input_fg, input_bg = segment_image(input_image, input_mask, target_class)
    ref_fg, ref_bg = segment_image(ref_image, ref_mask, target_class)

    # Convert to Lab color space
    input_fg_lab = rgb_to_lab(input_fg)
    input_bg_lab = rgb_to_lab(input_bg)
    ref_fg_lab = rgb_to_lab(ref_fg)
    ref_bg_lab = rgb_to_lab(ref_bg)

    # Extract adaptive color palettes from the reference image segments in Lab space
    fg_palette_lab = get_palette(ref_fg_lab, base_colors=10, max_colors=20)
    bg_palette_lab = get_palette(ref_bg_lab, base_colors=10, max_colors=20)

    # Apply the color palettes to the input image segments in Lab space
    fg_matched_lab = apply_palette_lab(input_fg_lab, fg_palette_lab)
    bg_matched_lab = apply_palette_lab(input_bg_lab, bg_palette_lab)

    # Merge the processed foreground and background using the binary mask
    mask_fg = (input_mask == target_class)[..., None]
    result_lab = np.where(mask_fg, fg_matched_lab, bg_matched_lab)

    # Apply histogram matching for better color consistency
    result_matched_lab = apply_histogram_matching_lab(result_lab, ref_fg_lab)

    # Apply CLAHE to enhance contrast
    result_clahe_lab = apply_clahe(result_matched_lab)

    # Convert back to RGB
    result_rgb = lab_to_rgb(result_clahe_lab)

    # Blend original and transferred images near edges
    result_blended = blend_edges(input_image, result_rgb, input_mask)

    # Apply guided filtering for smoother and edge-preserving results
    result_filtered = guided_filter(result_blended, input_image)

    # Save intermediate results for debugging if enabled
    if debug:
        cv2.imwrite("debug_fg_matched.jpg", lab_to_rgb(fg_matched_lab))
        cv2.imwrite("debug_bg_matched.jpg", lab_to_rgb(bg_matched_lab))
        cv2.imwrite("debug_result_blended.jpg", result_blended)

    return Image.fromarray(result_filtered.astype(np.uint8))


if __name__ == "__main__":
    # Input and reference image paths
    input_img_path = "input.jpg"
    ref_img_path = "reference.jpeg"

    # Set target class (15 is for person by default)
    target_class = 15

    # Apply enhanced color transfer with segmentation
    result_image = color_transfer_with_segmentation(input_img_path, ref_img_path, target_class, debug=True)

    # Save the result
    result_image.save("output.jpg")
    print("Color transfer completed. Output saved as 'output.jpg'.")


  return fit_method(estimator, *args, **kwargs)


Color transfer completed. Output saved as 'output.jpg'.


[ WARN:0@707.084] global loadsave.cpp:848 imwrite_ Unsupported depth image for selected encoder is fallbacked to CV_8U.


In [4]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11n-seg.pt")  # load an official model
model = YOLO("/home/neelraj-reddy/college/6th_sem/computer vision/project/trying out/yolo11n-seg.pt")  # load a custom model

# Predict with the model
results = model("/home/neelraj-reddy/college/6th_sem/computer vision/project/trying out/input3.jpg")  # predict on an image

# Access the results
for result in results:
    xy = result.masks.xy  # mask in polygon format
    xyn = result.masks.xyn  # normalized
    masks = result.masks.data  # mask in matrix format (num_objects x H x W)


image 1/1 /home/neelraj-reddy/college/6th_sem/computer vision/project/trying out/input3.jpg: 448x640 1 dog, 7.4ms
Speed: 2.1ms preprocess, 7.4ms inference, 2.4ms postprocess per image at shape (1, 3, 448, 640)


In [5]:
# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen
    result.save(filename="result.jpg")  # save to disk