In [None]:
import cv2
import numpy as np
from skimage import exposure

# Read the target and reference images
target = cv2.imread('input3.jpg')
reference = cv2.imread('reference3.jpeg')

# Resize reference to match target size
reference = cv2.resize(reference, (target.shape[1], target.shape[0]))

# Convert target to grayscale
target_gray = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)

# Convert reference and target to Lab color space
target_lab = cv2.cvtColor(target, cv2.COLOR_BGR2Lab)
reference_lab = cv2.cvtColor(reference, cv2.COLOR_BGR2Lab)

# Split Lab channels of target and reference
l_tar, a_tar, b_tar = cv2.split(target_lab)
l_ref, a_ref, b_ref = cv2.split(reference_lab)

# Histogram matching for a and b channels
a_matched = exposure.match_histograms(a_tar, a_ref, channel_axis=None)
b_matched = exposure.match_histograms(b_tar, b_ref, channel_axis=None)

# Merge the grayscale L with matched a and b channels
colored_lab = cv2.merge((target_gray, a_matched.astype(np.uint8), b_matched.astype(np.uint8)))

# Convert back to BGR color space
result = cv2.cvtColor(colored_lab, cv2.COLOR_Lab2BGR)

# Save the result
cv2.imwrite('output_colored_matched.jpg', result)




In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
from sklearn.mixture import GaussianMixture
from scipy.stats import wasserstein_distance
import ot



# Load images
target_img = cv2.imread('input1.jpg')
reference_img = cv2.imread('reference1.jpeg')
target_img_rgb = cv2.cvtColor(target_img, cv2.COLOR_BGR2RGB)
reference_img_rgb = cv2.cvtColor(reference_img, cv2.COLOR_BGR2RGB)

# Load SAM model
sam_checkpoint = "/home/neelraj-reddy/college/6th_sem/computer vision/project/sam_vit_h_4b8939.pth"  # Replace with correct path if needed
model_type = "vit_h"
# device = "cuda"  # Use "cuda" if you have a GPU, else "cpu"

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)

# Load the SAM model on CPU
sam.to('cpu')

mask_generator = SamAutomaticMaskGenerator(sam)

# Generate masks on CPU
target_masks = mask_generator.generate(target_img_rgb)
reference_masks = mask_generator.generate(reference_img_rgb)


# Extract masked regions and dominant colors using GMM
def extract_colors_with_gmm(img, masks, n_components=5):
    colors = []
    for mask in masks:
        binary_mask = mask['segmentation']
        region_pixels = img[binary_mask]
        
        # Fit GMM to extract dominant colors
        gmm = GaussianMixture(n_components=n_components, random_state=42)
        gmm.fit(region_pixels)
        
        # Get mean color of each component
        region_colors = gmm.means_
        colors.append(region_colors)
    
    return colors


target_colors = extract_colors_with_gmm(target_img_rgb, target_masks)
reference_colors = extract_colors_with_gmm(reference_img_rgb, reference_masks)

# Apply Optimal Transport (OT) for color mapping
def color_transfer_ot(source_colors, target_colors):
    transferred_colors = []
    
    for src_colors, tgt_colors in zip(source_colors, target_colors):
        n_source = src_colors.shape[0]
        n_target = tgt_colors.shape[0]
        
        # Cost matrix - Euclidean distance between source and target colors
        cost_matrix = np.linalg.norm(src_colors[:, np.newaxis] - tgt_colors, axis=2)
        
        # Apply Sinkhorn OT
        source_weights = np.ones(n_source) / n_source
        target_weights = np.ones(n_target) / n_target
        ot_plan = ot.sinkhorn(source_weights, target_weights, cost_matrix, reg=0.1)
        
        # Transfer colors based on optimal mapping
        transferred_color = np.dot(ot_plan, tgt_colors)
        transferred_colors.append(transferred_color)
    
    return transferred_colors


transferred_colors = color_transfer_ot(reference_colors, target_colors)

# Reconstruct the target image with new colors
def apply_new_colors(img, masks, transferred_colors):
    result_img = img.copy()
    for mask, new_colors in zip(masks, transferred_colors):
        binary_mask = mask['segmentation']
        region_pixels = img[binary_mask]
        
        # Assign new colors to the region based on transferred colors
        for i, pixel in enumerate(region_pixels):
            distances = np.linalg.norm(new_colors - pixel, axis=1)
            closest_color_idx = np.argmin(distances)
            result_img[binary_mask][i] = new_colors[closest_color_idx]
    
    return result_img


result_img = apply_new_colors(target_img_rgb, target_masks, transferred_colors)

# Save and display results
result_bgr = cv2.cvtColor(result_img, cv2.COLOR_RGB2BGR)
cv2.imwrite("result_image.jpg", result_bgr)

plt.imshow(result_img)
plt.title("Color Transferred Image")
plt.axis('off')
plt.show()




In [1]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load the pre-trained YOLOv8 model for segmentation
model = YOLO('yolov8n-seg.pt')  # Use the YOLOv8 segmentation model (small version)

def segment_image_yolo(image):
    # Perform inference with YOLOv8 on the image
    results = model(image)
    
    # Extract segmentation masks
    masks = results.masks  # List of masks for each detected object
    return masks

def extract_palette(image, mask, num_colors=5):
    # Flatten the masked pixels to get colors
    pixels = image[mask > 0].reshape(-1, 3)
    if len(pixels) == 0:
        return np.array([[0, 0, 0]] * num_colors)  # Return dummy palette
    from sklearn.mixture import GaussianMixture
    gmm = GaussianMixture(n_components=min(num_colors, len(pixels))).fit(pixels)
    return gmm.means_

def color_transfer(target, reference, target_mask, reference_mask):
    target_palette = extract_palette(target, target_mask)
    reference_palette = extract_palette(reference, reference_mask)
    
    new_target = target.copy()
    for i in range(len(target_palette)):
        target_color = target_palette[i]
        ref_color = reference_palette[i]
        diff = ref_color - target_color
        new_target[target_mask == i] = np.clip(new_target[target_mask == i] + diff, 0, 255)
    return new_target

# Load target and reference images
target_img = cv2.imread('input.jpg')
reference_img = cv2.imread('reference.jpeg')

# Segment the images using YOLOv8
target_masks = segment_image_yolo(target_img)
reference_masks = segment_image_yolo(reference_img)

# You can access the masks for each object in the 'masks' list and choose the one you need
# Here, we assume the masks are binary and we're processing the first object detected in each image

# Assuming a simple case where we are working with the first object mask
target_mask = target_masks[0].cpu().numpy()  # Extract the first object mask
reference_mask = reference_masks[0].cpu().numpy()  # Extract the first object mask

# Perform color transfer based on the masks
result_img = color_transfer(target_img, reference_img, target_mask, reference_mask)

# Save the resulting image
cv2.imwrite('result.jpg', result_img)












In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11n-seg.pt")  # load an official model
model = YOLO("/home/neelraj-reddy/college/6th_sem/computer vision/project/trying out/yolo11l-seg.pt")  # load a custom model

# Predict with the model
results = model("/home/neelraj-reddy/college/6th_sem/computer vision/project/trying out/input1.jpg")  # predict on an image

# Access the results
for result in results:
    xy = result.masks.xy  # mask in polygon format
    xyn = result.masks.xyn  # normalized
    masks = result.masks.data  # mask in matrix format (num_objects x H x W)





In [4]:
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch import nn
from torchvision import models, transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Load images
def load_image(image_path, max_size=400):
    image = Image.open(image_path).convert('RGB')
    if max(image.size) > max_size:
        scale = max_size / float(max(image.size))
        new_size = tuple([int(dim * scale) for dim in image.size])
        image = image.resize(new_size, Image.Resampling.LANCZOS)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.unsqueeze(0)),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform(image).to(device)

# Convert tensor to image for visualization
def imshow(tensor, title=None):
    image = tensor.cpu().clone()
    image = image.squeeze(0)
    image = image.permute(1, 2, 0)
    image = image.numpy()
    image = image * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
    image = np.clip(image, 0, 1)
    plt.imshow(image)
    if title:
        plt.title(title)
    plt.pause(0.001)

# Define a function to compute the gram matrix (used for style)
def gram_matrix(input_tensor):
    batch_size, channels, height, width = input_tensor.size()
    features = input_tensor.view(batch_size * channels, height * width)
    gram = torch.mm(features, features.t())
    return gram.div(batch_size * channels * height * width)

# Define a function for style transfer
def style_transfer(target_image, reference_image, num_steps=300, style_weight=1e6, content_weight=1):
    # Load pre-trained VGG-19 model
    vgg = models.vgg19(pretrained=True).features.to(device).eval()

    # Extract content and style layers
    content_layers = ['21']  # After 4th block (Relu)
    style_layers = ['0', '5', '10', '19']  # Different layers for style transfer

    # Get the content and style features of the images
    def get_features(image, layers):
        features = []
        x = image
        for name, layer in vgg._modules.items():
            x = layer(x)
            if name in layers:
                features.append(x)
        return features

    target_features = get_features(target_image, content_layers + style_layers)
    reference_features = get_features(reference_image, style_layers)

    target_content = target_features[0]
    target_style = target_features[1:]

    reference_style = [gram_matrix(f) for f in reference_features]

    # Optimize the target image
    target_image_opt = target_image.clone().requires_grad_(True)
    optimizer = optim.LBFGS([target_image_opt])

    def closure():
        optimizer.zero_grad()

        target_features = get_features(target_image_opt, content_layers + style_layers)

        target_content_loss = F.mse_loss(target_features[0], target_content)
        target_style_loss = 0
        for tf, rs in zip(target_features[1:], reference_style):
            target_style_loss += F.mse_loss(gram_matrix(tf), rs)

        total_loss = style_weight * target_style_loss + content_weight * target_content_loss
        total_loss.backward()

        return total_loss

    # Perform the optimization
    for i in range(num_steps):
        optimizer.step(closure)

        if i % 50 == 0:
            print(f"Step {i} - Total Loss: {closure():.4f}")
            imshow(target_image_opt, title=f'Step {i}')
    
    return target_image_opt

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load target and reference images
target_image = load_image('input.jpg').to(device)
reference_image = load_image('reference.jpeg').to(device)

# Perform style transfer
output_image = style_transfer(target_image, reference_image)

# Show the result
imshow(output_image, title='Style Transfer Output')








