In [1]:
#NLP CONNECT MODEL 
#https://huggingface.co/nlpconnect/vit-gpt2-image-captioning

In [2]:
!pip install yake

Collecting yake
  Downloading yake-0.4.8-py2.py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.2/60.2 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting segtok (from yake)
  Downloading segtok-1.5.11-py3-none-any.whl (24 kB)
Collecting jellyfish (from yake)
  Downloading jellyfish-1.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: segtok, jellyfish, yake
Successfully installed jellyfish-1.0.1 segtok-1.5.11 yake-0.4.8


In [3]:
import pandas as pd
import os
import yake
import torch
from PIL import Image
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer



In [4]:
def caption_gen(img) :
    model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
    feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
    tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    max_length = 16
    num_beams = 4
    gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
    def predict_step(image_paths):
      images = []
      for image_path in image_paths:
        i_image = Image.open(img)
        if i_image.mode != "RGB":
          i_image = i_image.convert(mode="RGB")

        images.append(i_image)

      pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
      pixel_values = pixel_values.to(device)

      output_ids = model.generate(pixel_values, **gen_kwargs)

      preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
      preds = [pred.strip() for pred in preds]
      return preds


    caption = predict_step(img)
    return caption

In [5]:
def meta_data(img) :
    image = Image.open(img)
    file_name = os.path.basename(img)
    # Get image metadata
    file_size = image.info.get('filesize')
    file_type = image.format
    image_size = image.size
    megapixels = (image_size[0] * image_size[1]) / 1000000 
    image_width = image_size[0]
    image_height = image_size[1]
    return file_name,file_size,file_type,image_size,megapixels,image_width,image_height


In [6]:
def img_info(file_name,caption) :

    # Load the model and tokenizer
    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
    model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large", device_map="auto")

    # Define your three prompts
    prompt_image_description = "Extract a brief description of the image from the following filename : "  + file_name
    prompt_daytime = "Extract the day duration from the following string(It may be daytime, rush hour, night time). If the day duration is not explicitly or implicitly mentioned in the string, assume the default to be 'daytime': "  + file_name + ","  + "caption"
    prompt_weather = "Extract the weather conditions from the following string(It may by sunny, winter, rain, rainy, mist, foggy). If the weather condition is not explicitly or implicitly mentioned in the string,  assume the default to be 'sunny' for that image"  + file_name + ","  + "caption"

    # Process each prompt and generate the corresponding output
    output_image_description = model.generate(tokenizer(prompt_image_description, return_tensors="pt").input_ids.to("cuda"))
    output_daytime = model.generate(tokenizer(prompt_daytime, return_tensors="pt").input_ids.to("cuda"))
    output_weather = model.generate(tokenizer(prompt_weather, return_tensors="pt").input_ids.to("cuda"))

    # Decode and clean the generated outputs
    description = tokenizer.decode(output_image_description[0], skip_special_tokens=True).strip()
    daytime = tokenizer.decode(output_daytime[0], skip_special_tokens=True).strip()
    weather = tokenizer.decode(output_weather[0], skip_special_tokens=True).strip()

    return description, daytime, weather


In [7]:
def keyword(imginfom) :
    kw_extractor = yake.KeywordExtractor()

    deduplication_threshold = 0.3
    deduplication_algo = 'seqm'
    windowSize = 1
    numOfKeywords = 7

    kw_extractor = yake.KeywordExtractor(
        dedupLim=deduplication_threshold,
        dedupFunc=deduplication_algo,
        windowsSize=windowSize,
        top=numOfKeywords
    )

#     # Join the file name and caption into a single string
#     input_text = file_name + " " + str(caption)

    # Extract keywords
    keywords = kw_extractor.extract_keywords(imginfom)

    # Create a list of just the keywords
    keyword_list = [keyword[0] for keyword in keywords]
    
    return keyword_list

In [8]:
df = pd.DataFrame(columns=[
    'Image Caption', 'Image Description', 'Image DayTime', 'Image Weather', 'KeyWords',
    'File Type', 'Image Size', 'Megapixels', 'Image Width', 'Image Height'
])


In [9]:
df.head()

Unnamed: 0,Image Caption,Image Description,Image DayTime,Image Weather,KeyWords,File Type,Image Size,Megapixels,Image Width,Image Height


In [10]:
import os
import shutil

image_folder = '/kaggle/input/hacktonauts-final'
output_folder = '/kaggle/working/renamed_image'  # Create a directory where you have write permissions
os.makedirs(output_folder, exist_ok=True)

image_info_list = []

# Create a list of new names for images with consecutive dots replaced by a single dot
for file in os.listdir(image_folder):
    new_file_name = '.'.join(file.split('.'))
    image_info_list.append(new_file_name)

# Copy and rename the files to the output directory
for i, old_file_name in enumerate(os.listdir(image_folder)):
    new_file_name = image_info_list[i]
    old_file_path = os.path.join(image_folder, old_file_name)
    new_file_path = os.path.join(output_folder, new_file_name)

    shutil.copy(old_file_path, new_file_path)

# List the renamed files in the output directory
for renamed_file in os.listdir(output_folder):
    print(renamed_file)

print("Files renamed successfully.")


2refined.png
generate a photo of an alley road in India with a small group of people walking there.png
generate a camera shot Image of an intersection in India with various types of vehicles including auto-rickshaws motorcycles and bicycles.png
Generate an image taken from an low point of view camera driving through a busy Indian road at early rainy morning high quality2.png
empty road at night2.png
2generate a camera shot image of an intersection of a road in India with various types of vehicles including autorickshaws cars motorcycles trucks and buses.png
3Generate a nighttime rainy season camera photo of a few vehicles with headlights on in an Indian road from a lower point of view..png
generate a camera shot quality image that shows a busy indian road with autos cars and scooties on the road. This is a zebracrossing with _red light signal_ and people crossing the road . It is a sunny d.png
2Generate a nighttime camera photo of a traffic policeman managing the flow of vehicles with 

In [11]:
import os
import pandas as pd

image_folder = '/kaggle/working/renamed_image'

for file in os.listdir(image_folder):
    file_path = os.path.join(image_folder, file)
    caption = caption_gen(file_path)
    file_name, file_size, file_type, image_size, megapixels, image_width, image_height = meta_data(file_path)
    description, daytime, weather = img_info(file_name,caption)
    # Input for keyword function
    input_text = file_name + " " + str(caption)
    keyword_list = keyword(input_text)
    image_info = {
        'Image Caption': [caption[0]],
        'Image Description': [description],
        'Image DayTime': [daytime],
        'Image Weather': [weather],
        'KeyWords': [keyword_list],
        'File Type': [file_type],
        'Image Size': [f"{image_size[0]}x{image_size[1]}"],
        'Megapixels': [f"{megapixels:.2f}"],
        'Image Width': [image_width],
        'Image Height': [image_height],
    }
    new_row = pd.DataFrame(image_info)
    data  = [df,new_row]
    df = pd.concat(data)


Downloading (…)lve/main/config.json:   0%|          | 0.00/4.61k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/228 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/241 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Downloading (…)lve/main/config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]



In [12]:
df.head()

Unnamed: 0,Image Caption,Image Description,Image DayTime,Image Weather,KeyWords,File Type,Image Size,Megapixels,Image Width,Image Height
0,a city street with cars parked on the side of ...,2 refined.png,daytime,sunny,"[city street, street with cars, road, cars, side]",PNG,512x512,0.26,512,512
0,a motorcycle is parked on the side of a road,a group of people walking down an alley in india,daytime,sunny,"[motorcycle is parked, side, people walking th...",PNG,1024x1024,1.05,1024,1024
0,a city street filled with lots of traffic,a camera shot of an intersection in india with...,daytime,sunny,"[city street filled, lots of traffic, filled w...",PNG,1024x1024,1.05,1024,1024
0,a car is driving down a flooded street,a low point of view camera driving through a b...,daytime,rainy,"[flooded street, car is driving, car, busy Ind...",PNG,1024x1024,1.05,1024,1024
0,a blurry photo of a street at night,empty road at night2.png,night,sunny,"[street at night, blurry photo, empty road]",PNG,1024x1024,1.05,1024,1024


In [13]:
df.index = range(1, len(df) + 1)


In [14]:
df.head()

Unnamed: 0,Image Caption,Image Description,Image DayTime,Image Weather,KeyWords,File Type,Image Size,Megapixels,Image Width,Image Height
1,a city street with cars parked on the side of ...,2 refined.png,daytime,sunny,"[city street, street with cars, road, cars, side]",PNG,512x512,0.26,512,512
2,a motorcycle is parked on the side of a road,a group of people walking down an alley in india,daytime,sunny,"[motorcycle is parked, side, people walking th...",PNG,1024x1024,1.05,1024,1024
3,a city street filled with lots of traffic,a camera shot of an intersection in india with...,daytime,sunny,"[city street filled, lots of traffic, filled w...",PNG,1024x1024,1.05,1024,1024
4,a car is driving down a flooded street,a low point of view camera driving through a b...,daytime,rainy,"[flooded street, car is driving, car, busy Ind...",PNG,1024x1024,1.05,1024,1024
5,a blurry photo of a street at night,empty road at night2.png,night,sunny,"[street at night, blurry photo, empty road]",PNG,1024x1024,1.05,1024,1024


In [15]:
df['Image DayTime'].unique()

array(['daytime', 'night', 'nighttime', 'night time'], dtype=object)

In [16]:
df['Image DayTime'] = df['Image DayTime'].apply(lambda x: 'nighttime' if x not in ['daytime', 'rush hour'] else x)
df['Image DayTime'].unique()

array(['daytime', 'nighttime'], dtype=object)

In [17]:
valid_weather_conditions = ['sunny', 'foggy', 'winter', 'rainy','rain','mist']

In [18]:
df['Image Weather'] = df['Image Weather'].apply(lambda x: 'rain' if x not in ['sunny', 'foggy', 'winter', 'rainy','rain'] else x)
df['Image Weather'].unique()


array(['sunny', 'rainy', 'rain'], dtype=object)

In [19]:
df['Image Weather'].unique()

array(['sunny', 'rainy', 'rain'], dtype=object)

In [20]:
df.to_csv('Dataset Characteristics.csv')