In [None]:
search_query = "dogs"
search_url = f"https://www.google.com/search?hl=en&tbm=isch&q={search_query}"


In [None]:
import os
import requests
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from bs4 import BeautifulSoup


In [None]:
response = requests.get(search_url)


In [None]:
soup = BeautifulSoup(response.text, 'html.parser')


In [None]:
images = []
for img in soup.find_all("img"):
    image_url = img.get("src")  # Get the source of the image
    if image_url and image_url.startswith("http"):
        images.append(image_url)


In [None]:
# Print the URLs
for image_url in images:
    print(image_url)

# Or, save to a file
with open('dog_images_urls.txt', 'w') as file:
    for image_url in images:
        file.write(image_url + "\n")


https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRKY09SOJjsCOG_q6ilm-rbUJjcLaYfkDQZBzifN0k0s4cuqf_XoLOscNmTsco&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTKSZdNfpeLu83S1DDcu1j3gQecGxdFff-8vhlzMng34-1n-CFgBd2mKbmrSg&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRcDm03pS6ZaFGAhJv5cnV0lmo3iM89iHHWk0EyZK2VljdXp6Q3ApRwBnGCDcE&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTaw7PMFcP1lxL3JibXZ4dHc729iigt1BhzWIrgBF0L65vrRVCZlHsYyv-cOQ&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR-TiABrxApDDXgpTjD729Y76MY8JWUj3eMMK8ZMhktKT4CvgEugi2S6tpJ84w&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ2RFc2luRg_a68Ek-mucM6-zscIQqnYzyU94GJi3gYQEpEDtxdpHzt8f1VlQ&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTpgFPpBmilb_2wiKfdWumPyVuwBTe8kFdz-B-J6UyutTSnHwm14PvY51_QFA&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTmcP_IKBJdS_FKwIWP_bRWhehv6wqS2dzR17DU4BTzUm-npHFZrw2oMcRFUkU&s
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRYNzpk

In [None]:
# for i, image_url in enumerate(images):
#     try:
#         img_data = requests.get(image_url).content
#         with open(f'image_{i}.jpg', 'wb') as handler:
#             handler.write(img_data)
#     except Exception as e:
#         print(f"Could not download {image_url}: {e}")



# define the folder where images will be stored
folder_name = 'downloaded_images'

# folder create agar already nhi h
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

# Loop through the list of image URLs
for i, image_url in enumerate(images):
    try:
        # Request image data
        img_data = requests.get(image_url).content

        # Define the path for the image file including the folder name
        file_path = os.path.join(folder_name, f'image_{i}.jpg')

        # Open the file and write the image data
        with open(file_path, 'wb') as handler:
            handler.write(img_data)
    except Exception as e:
        print(f"Could not download {image_url}: {e}")


In [None]:

# Directory where your images are stored
IMAGE_DIR = 'folder_name'

# Preprocessing transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to the size expected by your model
    transforms.ToTensor(),          # Convert to PyTorch Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization
])


In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, transform=None, transform_key=None):
        self.img_dir = img_dir
        self.img_names = os.listdir(img_dir)
        self.transform = transform  # Transformation for the query image
        self.transform_key = transform_key if transform_key else transform  # Transformation for the key image

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx])
        image = Image.open(img_path).convert('RGB')  # Convert to RGB

        im_q = self.transform(image) if self.transform else image  # Apply transformation to the query image
        im_k = self.transform_key(image) if self.transform_key else image  # Apply transformation to the key image

        return im_q, im_k


In [None]:
from torchvision import transforms
from torch.utils.data import DataLoader
# No need to import CustomImageDataset if it's defined in the same script

# Define transformations for the query image
transform_q = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # ... add any other augmentations specific to your task ...
])

# Define transformations for the key image
transform_k = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),  # Example of an additional transformation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # ... similar or different augmentations ...
])

# Assuming IMAGE_DIR is the path to your image directory
# dataset = CustomImageDataset(IMAGE_DIR, transform=transform_q, transform_key=transform_k)
# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Now you can iterate over the dataloader in your training loop
# for im_q, im_k in dataloader:
#     # Process im_q and im_k through your MoCo model
    # pass
