In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler, BaiduImageCrawler, FlickrImageCrawler, UrlListCrawler
from icrawler import Crawler

def scrape_google(art_style, num_images):
    crawler = GoogleImageCrawler(storage={"root_dir": f"Google/{art_style}"})
    crawler.crawl(
        keyword=f"{art_style} painting",
        max_num=num_images,
        filters={"license": "personal use"},
    )

def scrape_bing(art_style, num_images):
    crawler = BingImageCrawler(storage={"root_dir": f"Bing/{art_style}"})
    crawler.crawl(
        keyword=f"{art_style} painting",
        max_num=num_images,
        filters={"license": "personal use"},
    )

def scrape_baidu(art_style, num_images):
    crawler = BaiduImageCrawler(storage={"root_dir": f"Baidu/{art_style}"})
    crawler.crawl(
        keyword=f"{art_style} painting",
        max_num=num_images
    )

def scrape_flickr(art_style, num_images):
    crawler = FlickrImageCrawler(storage={"root_dir": f"Flickr/{art_style}"}, apikey="YOUR_FLICKR_API_KEY")
    crawler.crawl(
        keyword=f"{art_style} painting",
        max_num=num_images
    )

def scrape_from_urllist(file_path, output_dir):
    crawler = UrlListCrawler(storage={"root_dir": output_dir})
    crawler.crawl(file_path=file_path)

art_styles = ["Gond", "Kalighat", "Kangra", "Kerala Mural", "Madhubani", "Mandana", "Pichwai", "Warli"]
num_images = 500

for art_style in art_styles:
    scrape_google(art_style, num_images)
    scrape_bing(art_style, num_images)
    scrape_baidu(art_style, num_images)
    scrape_flickr(art_style, num_images)


# Note: This code does not fully automate image cleaning. For accurate image cleaning, manual review and filtering are recommended.

In [None]:
import os
from google.colab import drive
from PIL import Image
import google.generativeai as genai
import io

drive.mount('/content/drive')

# Path to the folder containing scraped images
image_folder = ""

# Configure Google Gemini API
GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel("gemini-1.5-flash")

evaluation_prompt = """
Analyze this image. If the image quality is poor, like too blurry or having poor lighting,
or if the image content does not depict a traditional Indian painting (Gond, Kalighat, Kangra, Kerala Mural, Madhubani, Mandana, Pichwai, Warli),
then say 'not ok' and describe the image briefly. If the image is a clear and authentic traditional painting, then just say 'ok'.
"""

def analyze_image(image_path):
    try:
        with open(image_path, "rb") as image_file:
            image_data = image_file.read()

        image = Image.open(io.BytesIO(image_data))

        response = model.generate_content([evaluation_prompt, image])
        text_response = response.text

        return text_response
    except Exception as e:
        print(f"Error analyzing {image_path}: {e}")
        return None

results = []
for filename in os.listdir(image_folder):
    if filename.lower().endswith(("png", "jpg", "jpeg", "bmp", "gif")):
        image_path = os.path.join(image_folder, filename)
        print(f"Analyzing {filename}...")
        evaluation = analyze_image(image_path)

        if evaluation and "not ok" in evaluation.lower():
            results.append((filename, evaluation))

if results:
    print("The following images do not meet the criteria:")
    for filename, evaluation in results:
        print(f"\nImage: {filename}")
        print(f"Evaluation: {evaluation}")
else:
    print("All images meet the criteria!")
