## Watermarker

### 1. Import Libraries

In [4]:
import requests
from PIL import Image, ImageDraw, ImageFont
from io import BytesIO
import random, os

### 2. Extract List of Image URLs, Developers, and Logo URLs from Watermark Data CSV File

In [120]:
import pandas as pd

def extract_columns_from_csv(csv_path, start_index=0, end_index=None):
    """Extracts columns from a CSV file and returns them as variables, allowing for specifying start and end indices.

    Args:
        csv_path: The path to the CSV file.
        start_index: The starting index (inclusive).
        end_index: The ending index (exclusive). If None, extracts all rows.

    Returns:
        A tuple containing three variables: image_urls, developers, and logo_urls.
    """

    # Read the CSV file using pandas
    df = pd.read_csv(csv_path)

    # Extract columns and save them as variables, specifying start and end indices
    image_urls = df["Image URL"][start_index:end_index]
    developers = df["Developer"][start_index:end_index]
    logo_urls = df["Logo URL"][start_index:end_index]

    return image_urls, developers, logo_urls

# Example usage
# paste absolute path of CSV file here:
csv_path = r"C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\dataset\watermark_data.csv"
start_index = 250  # Start from the nth row (index 0 is the first row)
end_index = 501    # Extract up to the nth row (exclusive)

image_urls, developers, logo_urls = extract_columns_from_csv(csv_path, start_index, end_index)

# Print the extracted variables (optional)
print("Image URLs:")
print(image_urls.tolist())  # Print as a list

print("\nDevelopers:")
print(developers.tolist())  # Print as a list

print("\nLogo URLs:")
print(logo_urls.tolist())  # Print as a list

Image URLs:
['https://api.omh.app/store/picture/73c0d16f-4d1b-46ff-8063-aea817648ed1', 'https://api.omh.app/store/picture/f88c1b7e-9dc4-4c81-b305-9e21ef2b67d6', 'https://api.omh.app/store/picture/7a14cee8-e940-4907-9cc4-9c57d0533336', 'https://api.omh.app/store/picture/d921840f-716f-4bf1-b397-4a7eb06d545e', 'https://api.omh.app/store/picture/701d0579-0c0d-4c07-b24f-b7fe00d6dcdf', 'https://api.omh.app/store/picture/34d7b06f-6491-4a0c-99e9-41058bb16eb6', 'https://api.omh.app/store/picture/de5ec0a3-56be-41fc-b345-8d6c06a6a732', 'https://api.omh.app/store/picture/26931d28-566e-47e2-bfd1-61a28ebc2bba', 'https://api.omh.app/store/picture/fff66b22-5ecc-477e-bea6-1f6ea1233bd6', 'https://api.omh.app/store/picture/f3153fa6-1deb-4622-84ff-dea6ec8f64d4', 'https://api.omh.app/store/picture/08816d52-0234-45e6-aeb0-0ca9dc73ef1f', 'https://api.omh.app/store/picture/c325cfbe-1eb6-4355-add1-43c34e52e221', 'https://api.omh.app/store/picture/b6ab777a-a596-4d64-bd3a-101392c0a4ab', 'https://api.omh.app/stor

### 3. Add Watermark to Property Image

##### Option 1: Add Logo URL Image as Watermark in Property Image

In [8]:
import requests
from PIL import Image
from io import BytesIO
import random, os

def download_and_watermark_images(image_urls, logo_urls, output_dir, transparency=0.7):
    """Downloads images and adds a watermark with random placement.

    Args:
        image_urls: A list of image URLs.
        logo_urls: A list of logo URLs, each corresponding to an image in image_urls.
        output_dir: The directory to save the watermarked images.
        transparency: The transparency level for the watermark logo (0 to 1).
    """
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for index, (image_url, logo_url) in enumerate(zip(image_urls, logo_urls)):
        # Download and open each image
        image = Image.open(BytesIO(requests.get(image_url).content)).convert("RGBA")

        # Download and open the logo image, then convert it to RGBA
        logo_image = Image.open(BytesIO(requests.get(logo_url).content)).convert("RGBA")

        # Resize the logo to a suitable size (adjust as needed)
        size = random.randint(150, 400)
        logo_size = (size, size)
        logo = logo_image.resize(logo_size, Image.LANCZOS)

        # Randomly place the watermark within the image (fix for negative width)
        image_width, image_height = image.size
        watermark_width, watermark_height = logo.size
        max_x_position = image_width - watermark_width  # Maximum valid X position
        x = random.randint(0, max_x_position)  # Random X within valid range
        y = random.randint(0, image_height - watermark_height)


        # Set transparency for the logo
        logo.putalpha(int(255 * transparency))

        # Create a new image with the watermark overlay
        watermark_image = image.copy()
        watermark_image.paste(logo, (x, y), logo)  # Using logo as the transparency mask

        # Save the watermarked image in JPEG format (or PNG if transparency is required)
        watermark_filename = f"watermarked_image_{index}.jpg"
        watermark_image.convert("RGB").save(os.path.join(output_dir, watermark_filename), "JPEG")

# Example usage
test_image_urls = [
    "https://api.omh.app/store/picture/a9c0f92e-1479-48fc-a212-e63ce9692723",
    "https://api.omh.app/store/picture/695f3135-3706-4800-af3d-d662051dbf09",
    # ... more image URLs
]
test_logo_urls = [
    "https://api.omh.app/store/picture/db0aa5ac-67c5-4ce1-87b0-ea8680738783",
    "https://api.omh.app/store/picture/04d5d9aa-22f8-4994-b17a-8fff389af97e",
    # ... more logo URLs
]
output_dir = "watermarked_images/logo_watermark"

download_and_watermark_images(test_image_urls, test_logo_urls, output_dir)


##### Option 2: Add Developer Name as Text Watermark in Property Image

In [114]:
import os

def get_font_paths(font_dir):
  """Gets a list of font paths from the given directory.

  Args:
    font_dir: The directory containing the font files.

  Returns:
    A list of font paths.
  """

  if not os.path.exists(font_dir):
    print(f"Error: Font directory '{font_dir}' does not exist.")
    return []

  font_files = [f for f in os.listdir(font_dir) if f.endswith(('.TTF', '.OTF'))]
  font_paths = [os.path.join(font_dir, font_file) for font_file in font_files]

  return font_paths

# Example usage:
font_dir = r"C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\fonts"
font_paths = get_font_paths(font_dir)

print(font_paths)

['C:\\Users\\lenovo\\OneDrive\\Documents\\PROJECTS\\ALT\\Real-Estate-Image-Matching\\data-scraping\\watermark-scraper\\1. scrape dataset\\fonts\\ARIAL.TTF', 'C:\\Users\\lenovo\\OneDrive\\Documents\\PROJECTS\\ALT\\Real-Estate-Image-Matching\\data-scraping\\watermark-scraper\\1. scrape dataset\\fonts\\ARIALBD.TTF', 'C:\\Users\\lenovo\\OneDrive\\Documents\\PROJECTS\\ALT\\Real-Estate-Image-Matching\\data-scraping\\watermark-scraper\\1. scrape dataset\\fonts\\ARIALBI.TTF', 'C:\\Users\\lenovo\\OneDrive\\Documents\\PROJECTS\\ALT\\Real-Estate-Image-Matching\\data-scraping\\watermark-scraper\\1. scrape dataset\\fonts\\ARIALI.TTF', 'C:\\Users\\lenovo\\OneDrive\\Documents\\PROJECTS\\ALT\\Real-Estate-Image-Matching\\data-scraping\\watermark-scraper\\1. scrape dataset\\fonts\\ARIALN.TTF', 'C:\\Users\\lenovo\\OneDrive\\Documents\\PROJECTS\\ALT\\Real-Estate-Image-Matching\\data-scraping\\watermark-scraper\\1. scrape dataset\\fonts\\ARIALNB.TTF', 'C:\\Users\\lenovo\\OneDrive\\Documents\\PROJECTS\\ALT\

In [121]:
import requests
from PIL import Image, ImageDraw, ImageFont, ImageStat, ImageFilter
from io import BytesIO
import random
import os

def is_plain_region(image, x, y, width, height, threshold=10):
    """Check if a region is plain by evaluating its texture variance."""
    region = image.crop((x, y, x + width, y + height))
    blurred = region.filter(ImageFilter.GaussianBlur(radius=5))
    diff = ImageStat.Stat(region).mean[0] - ImageStat.Stat(blurred).mean[0]
    return abs(diff) < threshold


def download_and_watermark_images(start_index, image_urls, logo_names, output_dir, transparency=128, sample_size=20, max_retries=10):
    """Downloads images and adds a watermark text in a visible area."""
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for index, (image_url, logo_name) in enumerate(zip(image_urls, logo_names)):
        print(f"Processing image {index + start_index}")
        retry_count = 0
        while retry_count < max_retries:
            try:
                # Download and open each image
                image = Image.open(BytesIO(requests.get(image_url).content)).convert("RGBA")

                # Convert image to grayscale and smooth version for plain region analysis
                grayscale_image = image.convert("L")
                smoothed_image = grayscale_image.filter(ImageFilter.GaussianBlur(radius=5))

                # Load font
                min_font_size = 50
                max_font_size = 150

                # Randomly choose a font
                font_path = random.choice(font_paths)
                try:
                    font = ImageFont.truetype(font_path, 50)
                except IOError:
                    font = ImageFont.load_default()

                # Randomly choose text case
                case_option = random.choice(["upper", "lower", "title", "original"])
                if case_option == "upper":
                    logo_name = logo_name.upper()
                elif case_option == "lower":
                    logo_name = logo_name.lower()
                elif case_option == "title":
                    logo_name = logo_name.title()

                # Initialize watermark text bounding box
                text_overlay = Image.new("RGBA", image.size, (255, 255, 255, 0))
                draw = ImageDraw.Draw(text_overlay)

                # Find suitable placement
                best_x, best_y = None, None
                lowest_brightness = 255
                for _ in range(sample_size):
                    font_size = random.randint(min_font_size, max_font_size)
                    font = ImageFont.truetype(font_path, font_size)
                    text_bbox = draw.textbbox((0, 0), logo_name, font=font)
                    text_width = text_bbox[2] - text_bbox[0]
                    text_height = text_bbox[3] - text_bbox[1]

                    # Ensure font fits within the image
                    while (text_width > image.width or text_height > image.height) and font_size > min_font_size:
                        font_size -= 2
                        font = ImageFont.truetype(font_path, font_size)
                        text_bbox = draw.textbbox((0, 0), logo_name, font=font)
                        text_width = text_bbox[2] - text_bbox[0]
                        text_height = text_bbox[3] - text_bbox[1]

                    if font_size <= min_font_size:
                        print(f"Warning: Font size reached minimum limit for image {index + start_index}.")
                        break

                    # Random position
                    x = random.randint(0, image.width - text_width)
                    y = random.randint(0, image.height - text_height)

                    # Check brightness and plain region
                    crop_area = grayscale_image.crop((x, y, x + text_width, y + text_height))
                    brightness = ImageStat.Stat(crop_area).mean[0]

                    if is_plain_region(smoothed_image, x, y, text_width, text_height) and brightness < lowest_brightness:
                        lowest_brightness = brightness
                        best_x, best_y = x, y

                # Fallback position and font size
                if best_x is None or best_y is None:
                    best_x = (image.width - text_width) // 2
                    best_y = (image.height - text_height) // 2

                # Draw watermark text
                draw.text((best_x, best_y), logo_name, fill=(255, 255, 255, transparency), font=font)

                # Combine images
                watermarked_image = Image.alpha_composite(image, text_overlay)

                # Save the watermarked image
                output_path = os.path.join(output_dir, f"watermarked_image_{index + start_index}.jpg")
                watermarked_image.convert("RGB").save(output_path, "JPEG")

                print(f"Font size used: {font_size}")
                break
            except Exception as e:
                    retry_count += 1
                    print(f"Failed to process image {index + start_index} on attempt {retry_count}: {e}")
                    if retry_count >= max_retries:
                        print(f"Max retries reached for image {index + start_index}. Skipping.")

# Example usage
test_image_urls = [
    "https://api.omh.app/store/picture/a9c0f92e-1479-48fc-a212-e63ce9692723",
    "https://api.omh.app/store/picture/695f3135-3706-4800-af3d-d662051dbf09",
]
test_logo_names = ["OhMyHome", "PropertyGuru"]
output_dir = "watermarked_images/test"

download_and_watermark_images(250, image_urls, developers, output_dir)


Processing image 250
Font size used: 50
Processing image 251
Font size used: 62
Processing image 252
Font size used: 77
Processing image 253
Font size used: 50
Processing image 254
Font size used: 50
Processing image 255
Font size used: 82
Processing image 256
Font size used: 76
Processing image 257
Font size used: 52
Processing image 258
Font size used: 50
Processing image 259
Font size used: 63
Processing image 260
Font size used: 91
Processing image 261
Font size used: 53
Processing image 262
Font size used: 149
Processing image 263
Font size used: 84
Processing image 264
Font size used: 109
Processing image 265
Font size used: 59
Processing image 266
Font size used: 63
Processing image 267
Font size used: 85
Processing image 268
Font size used: 112
Processing image 269
Font size used: 50
Processing image 270
Font size used: 50
Processing image 271
Font size used: 145
Processing image 272
Font size used: 136
Processing image 273
Font size used: 57
Processing image 274
Font size used

##### Option 3: Add Downloaded Transparent Logo as Watermark in Property Image

In [122]:
# Python list of cleaned-up names
real_estate_companies = [
    "5R Prime Properties",
    "650 Homes",
    "8990 Holdings, Inc.",
    "A Brown Company, Inc.",
    "ACM Homes Development Corp",
    "AboitizLand",
    "Active Group",
    "Aeon Luxe Properties Inc",
    "Agan Land Corp",
    "Al-Khor Dynamic Builders Inc.",
    "Alpina Heights Properties Corporation",
    "Alveo Land Corp.",
    "Amaia Land Corp.",
    "Anchor Land Holdings Inc",
    "Antel Land Holdings",
    "Apec Homes",
    "Arthaland",
    "Aseana Residential Holdings Corp.",
    "Asian Land Strategies Corporation",
    "Astranniquin Corporation",
    "Avida Land Corp.",
    "Axeia Development Corp",
    "Ayala Land Premier",
    "BESCO Clark Philippines Group of Companies, Corporation",
    "BJS Development Corporation",
    "Banff Realty %26 Development Corporation",
    "Bella Vita Land Corp",
    "Bellefonte Properties",
    "Bigpat Development",
    "Bilkenn Corporation",
    "Bluhomes",
    "Borland Development Corporation",
    "Breightonland",
    "Bria Homes",
    "Brittany Corporation",
    "C-5 Mansions Development Corporation",
    "CDC Holdings, Inc.",
    "CLS Properties Inc.",
    "COHO by Vista Land",
    "Calmar Land",
    "Camella Homes",
    "Camella Manors",
    "Cathay Land",
    "Cebu Land Masters, Inc.",
    "Central Country Estate Inc.",
    "Century Nuliv",
    "Chan Toei Properties",
    "Charles Builders Incorporated",
    "Citiglobal Realty %26 Development, Inc.",
    "Ciudades Development Corporation",
    "Clairmont Realty and Development Corporation",
    "Crown Asia",
    "DDC Land, Inc.",
    "DMCI Homes",
    "Damosa Land, Inc.",
    "DataLand Inc.",
    "Demeterland",
    "Dolmar Property Ventures, Inc.",
    "Don Tim Development Corp",
    "Double Dragon Properties Corporation",
    "Duraville Realty and Development Corp",
    "Duros Land Properties Inc",
    "ELM Land Inc",
    "Ecoglobal Development Corporation",
    "Elanvital Enclaves",
    "Empire East",
    "Enduraland Development Corporation",
    "Euro Tower International Inc.",
    "F.S Dulalia Realty",
    "Famtech Properties, Inc.",
    "Federal Land Inc",
    "Fiesta Communities inc",
    "Filigree",
    "Filinvest Land",
    "Firebird Holdings Corporation",
    "First Georgetown Ventures Inc.",
    "Fortune Real Properties",
    "G-Land Property %26 Development Corporation",
    "Geo Estate Development Corporation",
    "Global-Estate Resorts Inc.",
    "Golden Bay Landholdings",
    "Golden Home Realty Development Inc.",
    "Golden Tiger Realty %26 Properties, Corp.",
    "Golden Topper",
    "Goldstar Realty and Development Corporation",
    "GoshenLand",
    "Grand Taipan Land",
    "Green Island International Investments %26 Development Corporation",
    "Greenfield Development Corporation",
    "Greentech Development Corporation",
    "HTLand Inc.",
    "HausTalk, Inc.",
    "Hausland Development Corporation",
    "Hestia Properties and Development Corporation",
    "Homemark Inc.",
    "Honeycomb Builders Inc.",
    "Horizon Land Property Development Corp",
    "Imperial Homes Corporation",
    "Isoc Land",
    "Italpinas Development Corp",
    "Jackstone Properties Inc",
    "Jenson Land and Realty Development Corp.",
    "Keyland Corporation",
    "Kommuno Properties",
    "Landco Pacific Corporation",
    "Landisco Corp.",
    "Lessandra",
    "Lica Land",
    "Livingsprings Communitites Inc.",
    "Lumina Homes",
    "Lynville Land Development Corp",
    "Makati City Subway, Inc.",
    "Malveda Properties Development Corporation",
    "Maria Luisa Properties",
    "Megapines Realty %26 Development Corp.",
    "Megaworld Corporation",
    "Metrostar Realty Development Inc.",
    "Metrosummit Realty and Construction Corporation",
    "Moldex Realty, Inc.",
    "Montana Golden Real Estate Developer Company Inc",
    "MyCitihomes",
    "Myvan Properties %26 Development Inc.",
    "Nabaja Land Corporation",
    "New San Jose Builders Inc",
    "NewBeginnings Inc.",
    "Next Asia Homes",
    "Noble Land Development Corp",
    "Norheast Land Development Inc.",
    "Ortigas Land",
    "Ovialand",
    "P. A Properties Hankyu Hanshin",
    "P.A Properties",
    "PDB Properties Inc.",
    "PH1 World",
    "PIK",
    "Pabahay Equity Ventures Inc",
    "Pacifica Homes Development Corporation",
    "Philippine Realty %26 Holdings Corp",
    "Philippines Estate Corporation",
    "Phinma",
    "Phirst Park Homes",
    "Picar Development Inc.",
    "Plus Homes (Megawide)",
    "PonteFino Estates Creative Hotel Concepts, Inc.",
    "Priland Development Corporation",
    "Primary Homes Incorporated",
    "Prime Homes",
    "Primeworld Land Holdings Inc.",
    "Prince Jun Development Corporation",
    "Property of Friends and Company, Inc.",
    "Pueblo De Oro Development Corporation",
    "Pueblo de Panay Inc",
    "R Land Development Inc",
    "RCD Land Inc",
    "RHK Land Corporation",
    "RLC Residences",
    "Rafeli Realty %26 Development Corporation",
    "Red Oak Properties, Inc",
    "Reignwood Development Corporation",
    "Robinsons Homes",
    "Rockavilla Realty %26 Development Corporation",
    "Rockwell Land Corporation",
    "Roxaco Land Corporation",
    "Royal Properties, Inc.",
    "SM Development Corporation (SMDC)",
    "SOC Land Development Corporation",
    "SP Land Inc.",
    "ST. Felisse Realty and Development Corporation",
    "Shang Properties Inc",
    "Shang Robinson Properties Inc.",
    "Solanaland Development Inc.",
    "Solar Resources Inc",
    "Sta. Lucia Land Inc.",
    "Stateland, Inc.",
    "Summerhills Home Development Corporation",
    "Suntrust Properties, Inc.",
    "Sunwealth Land Development Corporation",
    "Taft Properties Inc.",
    "Taylormade Construction and Realty Corp.",
    "The Catanauan Cove Corp.",
    "The Estate Makati",
    "The Regalia Group Corp.",
    "Top Coastal Bay Realty Corporation",
    "Torre Lorenzo Development Corporation",
    "TransAsia Construction Development Corporation",
    "Transphil Real Estate Development Corp",
    "University Home",
    "Vanillascape Land Corporation",
    "Victor Consunji Development Corporation",
    "Vista Estates",
    "Vista Residences",
    "Wee Community Developers Inc.",
    "Worldwide Central Properties"
]

# Function to find the index of a logo name
def find_logo_index(name):
    try:
        return real_estate_companies.index(name)
    except ValueError:
        return random.randint(1, 193)

# Example usage
logo_name = "Camella Homes"
index = find_logo_index(logo_name)
print(f"The index of '{logo_name}' is: {index}")


The index of 'Camella Homes' is: 40


In [123]:
import os

def find_exact_filename(directory, number):
  """Finds the exact filename matching the given number.

  Args:
    directory: The directory to search.
    number: The number to match in the filename.

  Returns:
    The filename if found, otherwise None.
  """

  for filename in os.listdir(directory):
    if filename.startswith(f"image_{number}") and filename.endswith((".png", ".jpg")):
      return filename
  return None

# Example usage:
directory_path = r"C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos"
number_to_find = 5

filename = find_exact_filename(directory_path, 5)
if filename:
  print(f"Found file: {filename}")
else:
  print(f"File 'image_{number_to_find}' not found.")

Found file: image_5.png


##### Adds the transparent logo watermark in property image

In [11]:
import requests
from PIL import Image
from io import BytesIO
import random, os

def download_and_watermark_images(image_urls, logo_names, logos_dir, output_dir, transparency=0.7):
    """Downloads images and adds a transparent watermark with random placement.

    Args:
        image_urls: A list of image URLs.
        logo_urls: A list of logo URLs, each corresponding to an image in image_urls.
        logos_dir: The directory to find the logo images.
        output_dir: The directory to save the watermarked images.
        transparency: The transparency level for the watermark logo (0 to 1).
    """
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for index, (image_url, logo_name) in enumerate(zip(image_urls, logo_names)):
        # Download and open each image
        image = Image.open(BytesIO(requests.get(image_url).content)).convert("RGBA")
        
        # find index of logo
        index = find_logo_index(logo_name)
        # use index to find the logo in the logos folder
        logo_filename = find_exact_filename(logos_dir, index)
        # Construct the full path to the image
        logo_path = os.path.join(logos_dir, logo_filename)

        # Open the image
        with Image.open(logo_path) as logo_image:
            # Get original width and height
            width, height = image.size

            # Calculate the aspect ratio
            aspect_ratio = width / height

            # Randomly choose a new size within the specified range
            new_size = random.randint(150, 400)

            # Determine the new height based on the chosen size and aspect ratio
            new_height = int(new_size / aspect_ratio)

            # Ensure the new height is within the specified range
            new_height = min(100, max(400, new_height))

            # Calculate the new width based on the new height and aspect ratio
            new_width = int(new_height * aspect_ratio)

            # Resize the image
            logo = logo_image.resize((new_width, new_height), Image.LANCZOS)


            # Randomly place the watermark within the image (fix for negative width)
            image_width, image_height = image.size
            watermark_width, watermark_height = logo.size
            max_x_position = image_width - watermark_width  # Maximum valid X position
            x = random.randint(0, max_x_position)  # Random X within valid range
            y = random.randint(0, image_height - watermark_height)


            # Create a new image with the watermark overlay
            watermark_image = image.copy()
            watermark_image.paste(logo, (x, y), logo)  # Using logo as the transparency mask

            # Save the watermarked image in JPEG format (or PNG if transparency is required)
            watermark_filename = f"watermarked_image_{index}.jpg"
            watermark_image.convert("RGB").save(os.path.join(output_dir, watermark_filename), "JPEG")

# Example usage
image_urls = [
    "https://api.omh.app/store/picture/a9c0f92e-1479-48fc-a212-e63ce9692723",
    "https://api.omh.app/store/picture/695f3135-3706-4800-af3d-d662051dbf09",
    # ... more image URLs
]
logo_names = [
    "Vista Estates",
    "Vista Residences",
    # ... more logo names
]
logos_dir = r"C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos"
output_dir = "watermarked_images/transparent_logo_watermark"

download_and_watermark_images(image_urls, logo_names, logos_dir, output_dir)


##### Adds the transparent logo on a high contrasting background in the property image

In [55]:
import requests
from PIL import Image, ImageStat, ImageEnhance
from io import BytesIO
import random, os

def calculate_brightness(image):
    """Calculates the brightness of an image."""
    grayscale = image.convert("L")  # Convert to grayscale
    stat = ImageStat.Stat(grayscale)
    return stat.mean[0]  # Return the mean brightness

def find_contrasting_position(image, logo, threshold=128):
    """
    Finds a position in the image where the logo has contrasting brightness.
    Args:
        image: The main image.
        logo: The logo image.
        threshold: Brightness difference threshold for contrast.
    Returns:
        (x, y): Top-left position for placing the logo.
    """
    image_width, image_height = image.size
    logo_width, logo_height = logo.size
    max_x_position = image_width - logo_width
    max_y_position = image_height - logo_height

    # Iterate to find a good position
    for _ in range(50):  # Try 50 random positions
        x = random.randint(0, max_x_position)
        y = random.randint(0, max_y_position)
        crop_box = (x, y, x + logo_width, y + logo_height)
        region = image.crop(crop_box)
        region_brightness = calculate_brightness(region)
        logo_brightness = calculate_brightness(logo)
        if abs(region_brightness - logo_brightness) >= threshold:
            return x, y
    return random.randint(0, max_x_position), random.randint(0, max_y_position)  # Fallback random position

def download_and_watermark_images(image_urls, logo_names, logos_dir, output_dir, transparency=0.7):
    """Downloads images and adds a transparent watermark with intelligent placement."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i, (image_url, logo_name) in enumerate(zip(image_urls, logo_names)):
        count = i + 1
        print(count)
        image = Image.open(BytesIO(requests.get(image_url).content)).convert("RGBA")
        
        # Find the logo path
        index = find_logo_index(logo_name)
        print("Logo Index: "+str(index))
        logo_filename = find_exact_filename(logos_dir, index)
        print("Logo filename: "+logo_filename)
        logo_path = os.path.join(logos_dir, logo_filename)
        print("Logo path: "+logo_path)

        # Open the logo
        with Image.open(logo_path) as logo_image:
            logo_image = logo_image.convert("RGBA")
            
            # Adjust transparency of the logo
            alpha = logo_image.split()[3]
            alpha = ImageEnhance.Brightness(alpha).enhance(transparency)
            logo_image.putalpha(alpha)
            
            # Resize the logo
            logo_width = int(image.width * 0.2)  # Scale logo to 20% of image width
            logo_height = int(logo_width * logo_image.height / logo_image.width)
            logo_image = logo_image.resize((logo_width, logo_height), Image.LANCZOS)
            
            # Find contrasting position
            x, y = find_contrasting_position(image, logo_image)
            
            # Create a new image with the watermark
            watermarked_image = image.copy()
            watermarked_image.paste(logo_image, (x, y), logo_image)

            # Save the watermarked image
            output_path = os.path.join(output_dir, f"watermarked_image_{count}.jpg")
            print("Watermark Image: "+output_path)
            watermarked_image.convert("RGB").save(output_path, "JPEG")

# Example usage
test_image_urls = [
    "https://api.omh.app/store/picture/a9c0f92e-1479-48fc-a212-e63ce9692723",
    "https://api.omh.app/store/picture/695f3135-3706-4800-af3d-d662051dbf09",
    # ... more image URLs
]
test_logo_names = [
    "Vista Estates",
    "Vista Residences",
    # ... more logo names
]
logos_dir = r"C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos"
test_output_dir = "watermarked_images/transparent_logo_watermark"

download_and_watermark_images(test_image_urls, test_logo_names, logos_dir, test_output_dir)


1
Logo Index: 189
Logo filename: image_189.png
Logo path: C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos\image_189.png
Watermark Image: watermarked_images/transparent_logo_watermark\watermarked_image_1.jpg
2
Logo Index: 190
Logo filename: image_190.png
Logo path: C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos\image_190.png
Watermark Image: watermarked_images/transparent_logo_watermark\watermarked_image_2.jpg


In [50]:
# Example usage
# paste absolute path of CSV file here:
csv_path = r"C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\dataset\watermark_data.csv"
start_index = 0  # Start from the nth row (index 0 is the first row)
end_index = 249    # Extract up to the nth row (exclusive)

image_urls, developers, logo_urls = extract_columns_from_csv(csv_path, start_index, end_index)

# Print the extracted variables (optional)
print("Image URLs:")
print(image_urls.tolist())  # Print as a list

print("\nDevelopers:")
print(developers.tolist())  # Print as a list

print("\nLogo URLs:")
print(logo_urls.tolist())  # Print as a list

Image URLs:
['https://api.omh.app/store/picture/2cd4ea3c-73f3-40ab-bef9-a5167c718093', 'https://api.omh.app/store/picture/9d62b678-ff95-4a70-9f9b-259e1d61303d', 'https://api.omh.app/store/picture/90a97cfe-6cb4-41cc-be53-e5318239e4a3', 'https://api.omh.app/store/picture/e929c64a-391b-4d30-878d-9b62c33d4536', 'https://api.omh.app/store/picture/7fa2242a-02bb-48b2-9ec3-2ba75c85ad33', 'https://api.omh.app/store/picture/bfec52e7-ed1d-4d6b-8aef-685de06bcf6b', 'https://api.omh.app/store/picture/4fe29d8b-3239-4021-ba05-78aa55221835', 'https://api.omh.app/store/picture/8724be09-bd74-4998-bf6e-8d4dcc8bd338', 'https://api.omh.app/store/picture/f4cbf625-5d65-4038-9641-5bcc5cb86924', 'https://api.omh.app/store/picture/bef4928a-b62d-4e6f-9963-24107179731f', 'https://api.omh.app/store/picture/a43cb1cf-d246-4760-a452-1d789b0d8962', 'https://api.omh.app/store/picture/0a028a9f-aea1-4e4e-81ea-9a3f427caf35', 'https://api.omh.app/store/picture/143fa19c-161c-45be-81cd-8b384ba2c3d4', 'https://api.omh.app/stor

In [56]:
output_dir = "watermarked_images/b"
download_and_watermark_images(image_urls, developers, logos_dir, output_dir)

1
Logo Index: 4
Logo filename: image_4.png
Logo path: C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos\image_4.png
Watermark Image: watermarked_images/b\watermarked_image_1.jpg
2
Logo Index: 181
Logo filename: image_181.png
Logo path: C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos\image_181.png
Watermark Image: watermarked_images/b\watermarked_image_2.jpg
3
Logo Index: 146
Logo filename: image_146.png
Logo path: C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos\image_146.png
Watermark Image: watermarked_images/b\watermarked_image_3.jpg
4
Logo Index: 38
Logo filename: image_38.png
Logo path: C:\Users\lenovo\OneDrive\Documents\PROJECTS\ALT\Real-Estate-Image-Matching\data-scraping\watermark-scraper\1. scrape dataset\logos\image_38.png
Watermark I