In [None]:
import pandas as pd
import os
import shutil

from transformers import pipeline

In [None]:
df = pd.read_csv('../../input/Flicker30k/captions.csv') 

classifier = pipeline('text-classification', model='bert-base-uncased')

def classify_caption(caption):
    try:
        results = classifier(caption, truncation=True)[0] 
        return pd.Series([results['label'], results['score']])
    except Exception as e:
        return pd.Series(['error', 0])

df[['label', 'score']] = df['caption'].apply(classify_caption)

df.to_csv('../../input/Flicker30k/flick30k_all_result.csv', index=False)

for index, row in df.iterrows():
    print(f"Caption: {row['caption']}, Category: {row['label']}, Score: {row['score']}")


In [None]:
df = pd.read_csv('../../input/Flicker30k/flick30k_all_result.csv')

thresholds = {
    'sea': 0.7,
    'beach': 0.5,
    'desert': 0.8,
    'forest': 0.7,
    'glacier': 0.2,
    'mountain': 0.3,
    'snow': 0.5,
    'sand': 0.4,
    'lake': 0.4
}

def filter_row(row):
    return row['label'] in thresholds and row['confidence'] >= thresholds[row['label']]

filtered_result_df = df[df.apply(filter_row, axis=1)]

filtered_result_df.to_csv('../../input/Flicker30k/flick30k_filtered_result.csv', index=False)

print(f"Total number of qualifying captions: {len(filtered_result_df)}")

In [None]:
# Checking total number of filtered images
df = pd.read_csv('../../input/Flicker30k/flick30k_filtered_result.csv')

unique_image_filenames_count = df['image_filename'].nunique()

print(f"Number of unique image filenames: {unique_image_filenames_count}")

In [None]:
def extract_images(csv_file, directories, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    df = pd.read_csv(csv_file)
    image_filenames = df['image_filename'].unique()

    not_found_images = [] 

    for filename in image_filenames:
        found = False  
        for dir in directories:
            source_path = os.path.join(dir, filename)
            if os.path.exists(source_path):
                shutil.copy2(source_path, os.path.join(output_folder, filename))
                print(f"Copied {filename} to {output_folder}")
                found = True
                break 
        if not found:
            not_found_images.append(filename)

    if not_found_images:
        print("Images not found in any of the provided directories:")
        for img in not_found_images:
            print(img)

    return not_found_images

In [None]:
csv_file = '../../input/Flicker30k/flick30k_filtered_result.csv'
flicker_image_files = ['../../input/Flicker30k/flickr30k_images/']

flicker_filtered_image = '../../input/flicker30k/flicker30k_output_images/'
extract_images(csv_file, flicker_image_files, flicker_filtered_image)

In [None]:
'''
# Looking through all captions of each image, choose those image with at least 3 captions that meets the threshold of the label

df = pd.read_csv('flick30k_all_result.csv')

thresholds = {
    'sea': 0.7,
    'beach': 0.5,
    'desert': 0.8,
    'forest': 0.7,
    'glacier': 0.2,
    'mountain': 0.5,
    'snow': 0.5,
    'sand': 0.5,
    'lake': 0.5
}

qualify_images = []

grouped = df.groupby('image_filename')
for name, group in grouped:
    label_counts = {label: 0 for label in thresholds.keys()} 
    for _, row in group.iterrows():
        if row['label'] in thresholds and row['confidence'] >= thresholds[row['label']]:
            label_counts[row['label']] += 1
    
    if sum(count >= 3 for count in label_counts.values()) >= 1:
        qualify_images.append(name)

filtered_result_df = df[df['image_filename'].isin(qualify_images)]

filtered_result_df.to_csv('flick30k_filtered_result.csv', index=False)

print(f"Total number of qualifying images: {len(qualify_images)}")
'''

In [None]:
'''
# Caption Similarity Check with Kosomos and Blip Caption

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def compute_tfidf(ref_file1, ref_file2, new_file):
    ref_captions1 = pd.read_csv(ref_file1)['image_caption'].tolist()
    ref_captions2 = pd.read_csv(ref_file2)['image_caption'].tolist()
    ref_captions = ref_captions1 + ref_captions2
    
    new_captions_df = pd.read_csv(new_file)
    new_captions = new_captions_df['caption'].tolist()

    all_captions = ref_captions + new_captions
    vectorizer = TfidfVectorizer()
    all_captions_vectors = vectorizer.fit_transform(all_captions)

    ref_vectors = all_captions_vectors[:len(ref_captions)]
    new_vectors = all_captions_vectors[len(ref_captions):]

    return ref_vectors, new_vectors, new_captions_df

def filter_flicker(ref_vectors, new_vectors, new_captions_df, threshold, output_file):
    similarities = cosine_similarity(new_vectors, ref_vectors)
    max_similarities = np.max(similarities, axis=1)
    new_captions_df['similarity_score'] = max_similarities

    qualifying_images = set()

    for filename in new_captions_df['image_filename'].unique():
        image_data = new_captions_df[new_captions_df['image_filename'] == filename]
        qualifying_captions = image_data[image_data['similarity_score'] >= threshold]

        if len(qualifying_captions) >= 2:
            qualifying_images.add(filename)

    filtered_df = new_captions_df[new_captions_df['image_filename'].isin(qualifying_images)]

    filtered_df.to_csv(output_file, index=False)
    print(f"Filtered captions saved to {output_file}. Total qualifying images: {len(qualifying_images)}")

blip_label = '../../input/Landscape/Label/Blip_Label.csv'
kosomos_label = '../../input/Landscape/Label/Kosmos_Label.csv'
filtered_flicker30k = 'flick30k_filtered_result.csv'

ref_vectors, new_vectors, new_captions_df = compute_tfidf(blip_label, kosomos_label, filtered_flicker30k)

filter_flicker(ref_vectors, new_vectors, new_captions_df, 0.4, 'flicker30k_output_file.csv')

'''

In [None]:
'''
# Using CLIP Model to filter the landscape image

model_name = "openai/clip-vit-base-patch32"
model = CLIPModel.from_pretrained(model_name)
processor = CLIPProcessor.from_pretrained(model_name)

def is_landscape(image_path, positive_categories, negative_categories, threshold=0.5):
    try:
        image = Image.open(image_path)
    except UnidentifiedImageError:
        print(f"File is not an image or cannot be opened: {image_path}")
        return None, None
    
    inputs = processor(text=positive_categories + negative_categories, images=image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)

    positive_probs = probs[0][:len(positive_categories)]
    negative_probs = probs[0][len(positive_categories):]

    max_positive_prob = max(positive_probs).item()
    max_negative_prob = max(negative_probs).item()

    print(f"Image: {image_path}, Positive Probability: {max_positive_prob}, Negative Probability: {max_negative_prob}")

    return max_positive_prob, max_negative_prob

def classify_images(folder_path, output_folder, positive_categories, negative_categories, threshold=0.5):
    similar_folder = os.path.join(output_folder, 'similar')
    landscape_folder = os.path.join(output_folder, 'landscape')
    
    if not os.path.exists(similar_folder):
        os.makedirs(similar_folder)
    if not os.path.exists(landscape_folder):
        os.makedirs(landscape_folder)
    
    image_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    total_images = len(image_files)
    similar_count, landscape_count = 0, 0
    
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        positive_prob, negative_prob = is_landscape(image_path, positive_categories, negative_categories, threshold)
        
        if positive_prob is None or negative_prob is None:
            continue
        
        if negative_prob > positive_prob:
            continue

        destination_folder = None
        if abs(positive_prob - negative_prob) <= 0.1:
            destination_folder = similar_folder
            similar_count += 1
        elif positive_prob > threshold:
            destination_folder = landscape_folder
            landscape_count += 1
        
        if destination_folder:
            shutil.copy2(image_path, os.path.join(destination_folder, image_file))

    print(f"Total number of images: {total_images}")
    print(f"Number of images in 'similar' folder: {similar_count}")
    print(f"Number of images in 'landscape' folder: {landscape_count}")

positive_categories = ["mountain", "desert", "snow", "sea", "glacier", "beach"]
negative_categories = ["water", "city", "indoor", "parks", "grass", "urban", "pool", "stadium", "lake", "building", "street", "transport", "house", "shop", "garden", "traffic"]
folder_path = flicker_filtered_image
output_folder = '../../input/Flicker8k/Output/Zero_Shot'
classify_images(folder_path, output_folder, positive_categories, negative_categories)

'''