# Image Augmentation using META SAM-2 Model and Stability AI¶


Importing Images with Annoted text file for Yolov8n Model Training

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Image segmenting

importing SAM-2 model (may take a while to download)


In [None]:
!git clone https://github.com/facebookresearch/segment-anything-2.git
%cd /kaggle/working/segment-anything-2
%pip install -e .
%cd /kaggle/working/segment-anything-2/checkpoints
!bash /kaggle/working/segment-anything-2/checkpoints/download_ckpts.sh
%cd /kaggle/working/segment-anything-2

In [None]:
import torch
import matplotlib.pyplot as plt
from PIL import Image


In [None]:
# use bfloat16 for the entire notebook
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()

if torch.cuda.get_device_properties(0).major >= 8:
    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

# Displaying Image


In [None]:
image = Image.open('/kaggle/input/avengers/maxresdefault.jpg')
image = np.array(image.convert("RGB"))
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.axis('off')
plt.show()

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

# Functions to display mask and points on image

In [None]:
def show_mask(mask, ax, random_color=False, borders = True):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask = mask.astype(np.uint8)
    mask_image =  mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    if borders:
        import cv2
        contours, _ = cv2.findContours(mask,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        # Try to smooth contours
        contours = [cv2.approxPolyDP(contour, epsilon=0.01, closed=True) for contour in contours]
        mask_image = cv2.drawContours(mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2)
    ax.imshow(mask_image)

def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0, 0, 0, 0), lw=2))

def show_masks(image, masks, scores, point_coords=None, box_coords=None, input_labels=None, borders=True):
    for i, (mask, score) in enumerate(zip(masks, scores)):
        plt.figure(figsize=(10, 10))
        plt.imshow(image)
        show_mask(mask, plt.gca(), borders=borders)
        if point_coords is not None:
            assert input_labels is not None
            show_points(point_coords, input_labels, plt.gca())
        if box_coords is not None:
            # boxes
            show_box(box_coords, plt.gca())
        if len(scores) > 1:
            plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
        plt.axis('off')
        plt.show()

In [None]:
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis('on')
plt.show()

In [None]:
sam2_checkpoint = "/kaggle/working/segment-anything-2/checkpoints/sam2_hiera_base_plus.pt"
model_cfg = "sam2_hiera_b+.yaml"

sam2_model = build_sam2(model_cfg, sam2_checkpoint, device="cuda")

predictor = SAM2ImagePredictor(sam2_model)

In [None]:
predictor.set_image(image)

In [None]:
input_point = np.array([[370,400]])
input_label = np.array([1])

In [None]:
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_points(input_point, input_label, plt.gca())
plt.axis('on')
plt.show()

In [None]:
print(predictor._features["image_embed"].shape, predictor._features["image_embed"][-1].shape)

In [None]:
masks, scores, logits = predictor.predict(
    point_coords=input_point,
    point_labels=input_label,
    multimask_output=True,
)
sorted_ind = np.argsort(scores)[::-1]
masks = masks[sorted_ind]
scores = scores[sorted_ind]
logits = logits[sorted_ind]

In [None]:
show_masks(image, masks, scores, point_coords=input_point, input_labels=input_label, borders=True)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def show_masked_area(image, mask, ax, random_color=False):
    # Set the color of the mask (optional)
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])

    # Ensure the mask is in the correct shape
    h, w = mask.shape[-2:]
    mask = mask.astype(np.uint8)

    # Apply mask to the image
    masked_image = np.zeros_like(image)
    for c in range(3):  # Assuming the image has 3 color channels
        masked_image[..., c] = image[..., c] * mask

    # Show only the masked area
    ax.imshow(masked_image)

def show_masks_1(image, masks, scores):
    for i, (mask, score) in enumerate(zip(masks, scores)):
        fig, ax = plt.subplots(figsize=(10, 10))
        show_masked_area(image, mask, ax)
        plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
        plt.axis('off')
        plt.show()

show_masks_1(image, masks, scores)


In [None]:
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis('on')
plt.show()

In [None]:
input_point = np.array([[370, 400],[400,350]])
input_label = np.array([1,1])

mask_input = logits[np.argmax(scores), :, :]

In [None]:
masks, scores, _ = predictor.predict(
    point_coords=input_point,
    point_labels=input_label,
    mask_input=mask_input[None, :, :],
    multimask_output=False,
)

In [None]:
show_masks(image, masks, scores, point_coords=input_point, input_labels=input_label)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def show_masked_area(image, mask, ax, random_color=False):
    # Set the color of the mask (optional)
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])

    # Ensure the mask is in the correct shape
    h, w = mask.shape[-2:]
    mask = mask.astype(np.uint8)

    # Apply mask to the image
    masked_image = np.zeros_like(image)
    for c in range(3):  # Assuming the image has 3 color channels
        masked_image[..., c] = image[..., c] * mask

    # Show only the masked area
    ax.imshow(masked_image)

def show_masks_1(image, masks, scores):
    for i, (mask, score) in enumerate(zip(masks, scores)):
        fig, ax = plt.subplots(figsize=(10, 10))
        show_masked_area(image, mask, ax)
        plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
        plt.axis('off')
        plt.show()

show_masks_1(image, masks, scores)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os

def show_masked_area(image, mask, ax, random_color=False):
    """
    Display a masked area of an image on a given matplotlib axis.

    Parameters:
    - image: NumPy array of shape (height, width, 3), representing the image.
    - mask: Binary NumPy array of shape (height, width), representing the mask.
    - ax: Matplotlib axis object where the mask will be displayed.
    - random_color: Boolean, whether to use a random color for the mask.
    """
    # Set the color of the mask (optional)
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])

    # Ensure the mask is in the correct shape
    h, w = mask.shape[-2:]
    mask = mask.astype(np.uint8)

    # Apply mask to the image
    masked_image = np.zeros_like(image)
    for c in range(3):  # Assuming the image has 3 color channels
        masked_image[..., c] = image[..., c] * mask

    # Show only the masked area
    ax.imshow(masked_image)

def save_masked_images(image, masks, scores, save_dir="/kaggle/working/sam_2_augmentation/masked_images", random_color=False):
    """
    Save masked images to a specified directory.

    Parameters:
    - image: NumPy array of shape (height, width, 3), representing the image.
    - masks: List of binary NumPy arrays, each of shape (height, width), representing the masks.
    - scores: List of scores associated with each mask.
    - save_dir: String, the directory to save the masked images.
    - random_color: Boolean, whether to use a random color for each mask.
    """
    # Create the directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for i, (mask, score) in enumerate(zip(masks, scores)):
        fig, ax = plt.subplots(figsize=(10, 10))
        show_masked_area(image, mask, ax, random_color=random_color)
        plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
        plt.axis('off')

        # Save the figure
        filename = f"{save_dir}/masked_image_{i+1}.png"
        plt.savefig(filename, bbox_inches='tight', pad_inches=0)
        print(f"Saved {filename}")

        plt.close(fig)  # Close the figure to free up memory


In [None]:
# Example usage
if __name__ == "__main__":
    save_masked_images(image, masks, scores)


In [None]:
mask_img = Image.open('/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png')
mask_img = np.array(mask_img.convert("RGB"))
plt.figure(figsize=(20,20))
plt.imshow(mask_img)
plt.axis('off')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os

def show_inverse_masked_area(image, mask, ax, random_color=False):
    """
    Display an inverse masked area of an image on a given matplotlib axis.

    Parameters:
    - image: NumPy array of shape (height, width, 3), representing the image.
    - mask: Binary NumPy array of shape (height, width), representing the mask.
    - ax: Matplotlib axis object where the inverse mask will be displayed.
    - random_color: Boolean, whether to use a random color for the mask.
    """
    # Set the color of the mask (optional)
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])

    # Ensure the mask is in the correct shape
    h, w = mask.shape[-2:]
    mask = mask.astype(np.uint8)

    # Invert the mask
    inverse_mask = 1 - mask

    # Apply inverse mask to the image
    inverse_masked_image = np.zeros_like(image)
    for c in range(3):  # Assuming the image has 3 color channels
        inverse_masked_image[..., c] = image[..., c] * inverse_mask

    # Show only the inverse masked area
    ax.imshow(inverse_masked_image)

def save_inverse_masked_images(image, masks, scores, save_dir="/kaggle/working/sam_2_augmentation/inverse_masked_images", random_color=False):
    """
    Save inverse masked images to a specified directory.

    Parameters:
    - image: NumPy array of shape (height, width, 3), representing the image.
    - masks: List of binary NumPy arrays, each of shape (height, width), representing the masks.
    - scores: List of scores associated with each mask.
    - save_dir: String, the directory to save the inverse masked images.
    - random_color: Boolean, whether to use a random color for each mask.
    """
    # Create the directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for i, (mask, score) in enumerate(zip(masks, scores)):
        fig, ax = plt.subplots(figsize=(10, 10))
        show_inverse_masked_area(image, mask, ax, random_color=random_color)
        plt.title(f"Inverse Mask {i+1}, Score: {score:.3f}", fontsize=18)
        plt.axis('off')

        # Save the figure
        filename = f"{save_dir}/inverse_masked_image_{i+1}.png"
        plt.savefig(filename, bbox_inches='tight', pad_inches=0)
        print(f"Saved {filename}")

        plt.close(fig)  # Close the figure to free up memory


In [None]:
# Example usage
if __name__ == "__main__":
    save_inverse_masked_images(image, masks, scores)


In [None]:
inv_mask_img = Image.open('/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png')
inv_mask_img = np.array(inv_mask_img.convert("RGB"))
plt.figure(figsize=(20,20))
plt.imshow(inv_mask_img)
plt.axis('off')
plt.show()

In [None]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt

def combine_masked_regions(masked_image_path, inverse_masked_image_path, save_path):
    """
    Combine the original mask areas from the masked image with the inverse-masked image.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the combined image will be saved.
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size
    if masked_image.size != inverse_masked_image.size:
        raise ValueError("Masked and inverse masked images must be the same size")

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 10, axis=-1)

    # Replace inverse-masked image values with masked image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = masked_array[mask]

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

    # Display the combined image
    plt.imshow(combined_image)
    plt.axis('off')
    plt.show()

# Example usage
masked_image_path = "/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png"
inverse_masked_image_path = "/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png"
save_path = "/kaggle/working/sam_2_augmentation/combined_images/combined_image_1.png"

# Ensure the output directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Combine the images
combine_masked_regions(masked_image_path, inverse_masked_image_path, save_path)


In [None]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt

def pixelate_area(image, mask, pixelation_level):
    """
    Apply pixelation to the masked area of an image.

    Parameters:
    - image: NumPy array of the image to be pixelated.
    - mask: Boolean NumPy array indicating the masked area.
    - pixelation_level: Int, the size of the blocks used for pixelation.
    """
    # Create a copy of the image to modify
    pixelated_image = image.copy()

    # Get image dimensions
    h, w, _ = image.shape

    # Loop through the masked area and apply pixelation
    for y in range(0, h, pixelation_level):
        for x in range(0, w, pixelation_level):
            # Define the block area
            block = (slice(y, min(y + pixelation_level, h)), slice(x, min(x + pixelation_level, w)))

            # Check if the block is within the masked area
            if np.any(mask[block]):
                # Compute the mean color of the block
                mean_color = image[block].mean(axis=(0, 1)).astype(int)

                # Apply the mean color to the block
                pixelated_image[block] = mean_color

    return pixelated_image

def combine_pixelated_mask(masked_image_path, inverse_masked_image_path, save_path, pixelation_level=10):
    """
    Combine the pixelated masked areas from the masked image with the inverse-masked image.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the combined image will be saved.
    - pixelation_level: Int, the size of the blocks used for pixelation.
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size
    if masked_image.size != inverse_masked_image.size:
        raise ValueError("Masked and inverse masked images must be the same size")

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 0, axis=-1)

    # Pixelate the masked area
    pixelated_mask = pixelate_area(masked_array, mask, pixelation_level)

    # Replace inverse-masked image values with pixelated masked image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = pixelated_mask[mask]

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

    # Display the combined image
    plt.imshow(combined_image)
    plt.axis('off')
    plt.show()

# Example usage
masked_image_path = "/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png"
inverse_masked_image_path = "/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png"
save_path = "/kaggle/working/sam_2_augmentation/combined_images/pixelated_combined_image_1.png"

# Ensure the output directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Combine the images with pixelated mask
combine_pixelated_mask(masked_image_path, inverse_masked_image_path, save_path, pixelation_level=10)


In [None]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

def change_hue(image, mask, hue_shift):
    """
    Change the hue of the masked area in an image.

    Parameters:
    - image: NumPy array of the image to be modified (in RGB).
    - mask: Boolean NumPy array indicating the masked area.
    - hue_shift: Float, amount to shift the hue (0 to 1 for a complete cycle).
    """
    # Convert the image to float in the range [0, 1]
    float_image = image.astype('float32') / 255.0

    # Convert to HSV
    hsv_image = mcolors.rgb_to_hsv(float_image)

    # Change the hue in the masked area
    hsv_image[..., 0][mask] = (hsv_image[..., 0][mask] + hue_shift) % 1.0

    # Convert back to RGB
    modified_float_image = mcolors.hsv_to_rgb(hsv_image)

    # Scale back to [0, 255]
    modified_image = (modified_float_image * 255).astype('uint8')

    return modified_image

def combine_hue_modified_mask(masked_image_path, inverse_masked_image_path, save_path, hue_shift=0.1):
    """
    Combine the hue-modified masked areas from the masked image with the inverse-masked image.

    Parameters:
    - masked_image_path: String, path to the masked image.
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the combined image will be saved.
    - hue_shift: Float, amount to shift the hue (0 to 1 for a complete cycle).
    """
    # Open images
    masked_image = Image.open(masked_image_path).convert("RGBA")
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Ensure images are the same size
    if masked_image.size != inverse_masked_image.size:
        raise ValueError("Masked and inverse masked images must be the same size")

    # Convert images to numpy arrays
    masked_array = np.array(masked_image)
    inverse_masked_array = np.array(inverse_masked_image)

    # Create a mask where the original mask was applied (non-zero areas in any color channel)
    mask = np.any(masked_array[..., :3] > 0, axis=-1)

    # Change the hue of the masked area
    hue_modified_mask = change_hue(masked_array[..., :3], mask, hue_shift)

    # Replace inverse-masked image values with hue-modified masked image values where mask is true
    combined_array = inverse_masked_array.copy()
    combined_array[mask] = np.dstack((hue_modified_mask, masked_array[..., 3]))[mask]  # Preserve alpha channel

    # Convert back to image
    combined_image = Image.fromarray(combined_array)

    # Save the combined image
    combined_image.save(save_path)
    print(f"Combined image saved as {save_path}")

    # Display the combined image
    plt.imshow(combined_image)
    plt.axis('off')
    plt.show()

# Example usage
masked_image_path = "/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png"
inverse_masked_image_path = "/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png"
save_path = "/kaggle/working/sam_2_augmentation/combined_images/hue_modified_combined_image_1.png"

# Ensure the output directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Combine the images with hue-modified mask
combine_hue_modified_mask(masked_image_path, inverse_masked_image_path, save_path, hue_shift=0.1)


In [None]:
from PIL import Image, ImageFilter
import numpy as np
import os
import matplotlib.pyplot as plt

def apply_blur_to_masked_area(image, mask, blur_radius=10):
    """
    Apply a blur effect to the masked area of an image.

    Parameters:
    - image: PIL Image object of the original image.
    - mask: Boolean NumPy array indicating the masked area.
    - blur_radius: Integer, the radius of the Gaussian blur for the blur effect.
    """
    # Convert image to numpy array
    image_array = np.array(image)

    # Create a mask image
    mask_image = Image.fromarray((mask * 255).astype('uint8'), mode='L')

    # Apply a Gaussian blur to the mask image
    blurred_mask_image = mask_image.filter(ImageFilter.GaussianBlur(radius=blur_radius))

    # Convert the blurred mask to RGB
    blurred_mask_image = blurred_mask_image.convert('RGB')
    blurred_mask_array = np.array(blurred_mask_image)

    # Create an image with the same dimensions as the original image
    blurred_area = np.zeros_like(image_array[..., :3])
    blurred_area[mask] = blurred_mask_array[mask]

    # Combine the blurred area with the original image
    combined_array = np.where(blurred_area > 0, blurred_area, image_array[..., :3])
    combined_image = Image.fromarray(np.uint8(combined_array))

    # Preserve the alpha channel from the original image
    alpha_channel = image_array[..., 3]
    combined_image = Image.fromarray(np.dstack((combined_array, alpha_channel)))

    return combined_image

def combine_and_apply_blur(masked_image_path, inverse_masked_image_path, save_path, blur_radius):
    """
    Apply a blur effect to the masked image and save the result.

    Parameters:
    - masked_image_path: String, path to the masked image (used to extract the mask).
    - inverse_masked_image_path: String, path to the inverse-masked image.
    - save_path: String, path where the final image will be saved.
    - blur_radius: Integer, the radius of the Gaussian blur for the blur effect.
    """
    # Open inverse-masked image
    inverse_masked_image = Image.open(inverse_masked_image_path).convert("RGBA")

    # Extract the mask from the masked image
    masked_image = Image.open(masked_image_path).convert("L")
    mask = np.array(masked_image) > 0

    # Apply blur effect to the masked area
    blurred_image = apply_blur_to_masked_area(inverse_masked_image, mask, blur_radius)

    # Save the final image
    blurred_image.save(save_path)
    print(f"Final image with blur effect saved as {save_path}")

    # Display the final image
    plt.imshow(blurred_image)
    plt.axis('off')
    plt.show()

# Example usage
if __name__ == "__main__":
    blur_radius = 10  # Define the blur radius

    masked_image_path = "/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png"  # Path to the masked image
    inverse_masked_image_path = "/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png"
    save_path = "/kaggle/working/sam_2_augmentation/combined_images/blur_combined_image_1.png"

    # Ensure the output directory exists
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    # Apply blur effect to masked area
    combine_and_apply_blur(masked_image_path, inverse_masked_image_path, save_path, blur_radius)


# image generation

In [None]:
!pip install stability-sdk

In [None]:
import os
import io
import warnings
from PIL import Image
from stability_sdk import client
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation

# Our Host URL should not be prepended with "https" nor should it have a trailing slash.
os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'

# Sign up for an account at the following link to get an API Key.
# https://platform.stability.ai/

# Click on the following link once you have created an account to be taken to your API Key.
# https://platform.stability.ai/account/keys

# Paste your API Key below.

os.environ['STABILITY_KEY'] = 'sk-XIDL92cKTbCsA1BS2MSxJVUZrmogZbA37YWWdq83G5cAgg6z'

In [None]:
# Set up our connection to the API.
stability_api = client.StabilityInference(
    key=os.environ['STABILITY_KEY'], # API Key reference.
    verbose=True, # Print debug messages.
    engine="stable-diffusion-xl-1024-v1-0", # Set the engine to use for generation.
    # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
)

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
url = "/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png"
# Open the image
img = Image.open(url)

# Display the image
plt.imshow(img)
plt.show()

# # Get the dimensions of the image
width, height = img.size
print(f"Width: {width}, Height: {height}")


In [None]:
# Open your image (this is an example path)
image = Image.open(url)

# Convert the image from RGBA to RGB
rgb_image = image.convert("RGB")

# Save the image as JPEG
rgb_image.save('/kaggle/working/sam_2_augmentation/output_resized_image.jpg', format="JPEG")


In [None]:
def resize_image(image_path, output_path, max_size=1024):
    # Open the image
    image = Image.open(image_path)

    # Get the current width and height of the image
    width, height = image.size

    # Calculate the scaling factor
    if width > height:
        scaling_factor = max_size / width
    else:
        scaling_factor = max_size / height

    # Only resize if the image is larger than the max_size
    if scaling_factor < 1:
        # Calculate new dimensions
        new_width = int(width * scaling_factor)
        new_height = int(height * scaling_factor)

        # Resize the image
        image_resized = image.resize((new_width, new_height))

        # Save the resized image
        image_resized.save(output_path)
        print(f"Image resized to {new_width}x{new_height} and saved as {output_path}")
    else:
        # Save the original image without resizing
        image.save(output_path)
        print(f"Image is already within the size limits and saved as {output_path}")

# Example usage
resize_image('/kaggle/working/sam_2_augmentation/output_resized_image.jpg', '/kaggle/working/sam_2_augmentation/output_resized_image.jpg')


In [None]:
# Open the image
img = Image.open("/kaggle/working/sam_2_augmentation/output_resized_image.jpg")

# Display the image
plt.imshow(img)
plt.show()

# # Get the dimensions of the image
width, height = img.size
print(f"Width: {width}, Height: {height}")

In [None]:
# Set up our initial generation parameters.
answers2 = stability_api.generate(
    prompt="body builder,alphonse mucha and simon stalenhag style,",
    # guidance_models =
    init_image=img, # Assign our previously generated img as our Initial Image for transformation.
    start_schedule=0.6, # Set the strength of our prompt in relation to our initial image.
    # seed=12343566, # If attempting to transform an image that was previously generated with our API,
                    # initial images benefit from having their own distinct seed rather than using the seed of the original image generation.
    steps=250, # Amount of inference steps performed on image generation. Defaults to 30.
    cfg_scale=10.0, # Influences how strongly your generation is guided to match your prompt.
                   # Setting this value higher increases the strength in which it tries to match your prompt.
                   # Defaults to 7.0 if not specified.
    width=width, # Generation width, defaults to 512 if not included.
    height=height, # Generation height, defaults to 512 if not included.
    sampler=generation.SAMPLER_DDIM ,style_preset="comic-book" # Choose which sampler we want to denoise our generation with.
                                                 # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
                                                 # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
)

# Set up our warning to print to the console if the adult content classifier is tripped.
# If adult content classifier is not tripped, save generated image.
for resp in answers2:
    for artifact in resp.artifacts:
        if artifact.finish_reason == generation.FILTER:
            warnings.warn(
                "Your request activated the API's safety filters and could not be processed."
                "Please modify the prompt and try again.")
        if artifact.type == generation.ARTIFACT_IMAGE:
            global img2
            img2 = Image.open(io.BytesIO(artifact.binary))
            img2.save("/kaggle/working/sam_2_augmentation/output"+ "-img2img.png") # Save our generated image with its seed number as the filename and the img2img suffix so that we know this is our transformed image.hhhhhhhhhhhhhh

# Open the image
out_img = Image.open("/kaggle/working/sam_2_augmentation/output-img2img.png")

# Display the image
plt.imshow(out_img)
plt.show()

In [None]:
masked_image_path = "/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png"  # Path to the masked imag
inverse_masked_image_path = "/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png"

In [None]:

# Example usage
img2img_masked_image_path = "/kaggle/working/sam_2_augmentation/output-img2img.png"
inverse_masked_image_path = "/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png"
img2img_save_path = "/kaggle/working/sam_2_augmentation/combined_image_1.png"

# Ensure the output directory exists
os.makedirs(os.path.dirname(img2img_save_path), exist_ok=True)

# Combine the images
combine_masked_regions(img2img_masked_image_path, inverse_masked_image_path, img2img_save_path)

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
url = "/kaggle/working/sam_2_augmentation/inverse_masked_images/inverse_masked_image_1.png"
# Open the image
img = Image.open(url)

# Display the image
plt.imshow(img)
plt.show()

# # Get the dimensions of the image
width, height = img.size
print(f"Width: {width}, Height: {height}")

In [None]:
# Open your image (this is an example path)
image = Image.open(url)

# Convert the image from RGBA to RGB
rgb_image = image.convert("RGB")

# Save the image as JPEG
rgb_image.save("/kaggle/working/sam_2_augmentation/your_inv_image.jpg", format="JPEG")


In [None]:
# Example usage
resize_image('/kaggle/working/sam_2_augmentation/your_inv_image.jpg', '/kaggle/working/sam_2_augmentation/output_resized_image.jpg')

In [None]:
# Open the image
img = Image.open("/kaggle/working/sam_2_augmentation/output_resized_image.jpg")

# Display the image
plt.imshow(img)
plt.show()

# # Get the dimensions of the image
width, height = img.size
print(f"Width: {width}, Height: {height}")

In [None]:
# Set up our initial generation parameters.
answers2 = stability_api.generate(
    prompt="group of friends in suit,alphonse mucha and simon stalenhag style",
    # guidance_models =
    init_image=img, # Assign our previously generated img as our Initial Image for transformation.
    start_schedule=0.5, # Set the strength of our prompt in relation to our initial image.
    # seed=12343566, # If attempting to transform an image that was previously generated with our API,
                    # initial images benefit from having their own distinct seed rather than using the seed of the original image generation.
    steps=400, # Amount of inference steps performed on image generation. Defaults to 30.
    cfg_scale=10.0, # Influences how strongly your generation is guided to match your prompt.
                   # Setting this value higher increases the strength in which it tries to match your prompt.
                   # Defaults to 7.0 if not specified.
    width=width, # Generation width, defaults to 512 if not included.
    height=height, # Generation height, defaults to 512 if not included.
    sampler=generation.SAMPLER_DDIM ,style_preset="anime" # Choose which sampler we want to denoise our generation with.
                                                 # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
                                                 # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
)

# Set up our warning to print to the console if the adult content classifier is tripped.
# If adult content classifier is not tripped, save generated image.
for resp in answers2:
    for artifact in resp.artifacts:
        if artifact.finish_reason == generation.FILTER:
            warnings.warn(
                "Your request activated the API's safety filters and could not be processed."
                "Please modify the prompt and try again.")
        if artifact.type == generation.ARTIFACT_IMAGE:
            global img2
            img2 = Image.open(io.BytesIO(artifact.binary))
            img2.save("/kaggle/working/sam_2_augmentation/output"+ "-img2img.png") # Save our generated image with its seed number as the filename and the img2img suffix so that we know this is our transformed image.hhhhhhhhhhhhhh

# Open the image
out_img = Image.open("/kaggle/working/sam_2_augmentation/output-img2img.png")

# Display the image
plt.imshow(out_img)
plt.show()

In [None]:
# Example usage
masked_image_path = "/kaggle/working/sam_2_augmentation/masked_images/masked_image_1.png"
inverse_masked_image_path = "/kaggle/working/sam_2_augmentation/output-img2img.png"
save_path = "/kaggle/working/sam_2_augmentation/combined_images/combined_image_1.png"

# Ensure the output directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Combine the images
combine_masked_regions(masked_image_path, inverse_masked_image_path, save_path)
