In [None]:
!pip install torch torchvision torchaudio transformers diffusers accelerate salesforce-lavis

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
from PIL import Image

from lavis.models import load_model_and_preprocess

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
# raw_image = Image.open("example.jpg").convert("RGB")


In [None]:
model, vis_processors, _ = load_model_and_preprocess(
    name="blip_caption", model_type="large_coco", is_eval=True, device=device
)
vis_processors.keys()

dict_keys(['train', 'eval'])

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def cosine_sim(str1, str2):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([str1, str2])
    return cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]


In [None]:
rooms_string = '2D_floor_plan, 3D_floor_plan, balcony, bathroom, cellar, details, dining_room, documents, empty_room, energy_certificate, garden, gym, hall-corridor, kitchen, laundry_room, living-dining_room, living_room, map_location, mountain_view, non_related, office, outdoor_building, outdoor_house, parking, pool, reception-lobby, room-bedroom, stairs, storage_pantry, terrace, walk_in_closet, water_view'
room_list_under = rooms_string.split(', ')

rooms_list = []

for my_string in room_list_under:
        new_string = my_string.replace('_', ' ')
        new_string = new_string.replace('-', ' ')
        rooms_list.append(new_string)


In [None]:
def union_strings(str1, str2):
    words1 = str1.split()
    words2 = str2.split()

    unique_words = []
    for word in words1 + words2:
        if word not in unique_words:
            unique_words.append(word)

    return " ".join(unique_words)


In [None]:
import pandas as pd
import requests
from transformers import pipeline
import difflib
import re
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import json

model2 = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer2 = GPT2Tokenizer.from_pretrained('gpt2')

df = pd.read_csv("out.csv")
df["blip"] = ""

start = 7000
finish = 1000

#df = df.head(1000)
df = df[start:].head(finish)

data = {}

for index, row in df.iterrows():
    if index == 0:
        continue
    
    try:
      url = row[0]
      image_path = url[61:]
      filename = image_path[7:]
      filename = filename[:-4]
      conditioning_image_path = "processed_" + image_path

      print(index)
      #raw_image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
      raw_image = Image.open(requests.get(url, stream=True, timeout=5).raw).convert('RGB')
      
      image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
      description = str(model.generate({"image": image})[0])
      
      caption = row[1]
      description = description.replace('-', ' ')
      caption = caption.replace('-', ' ')

      description_matches = []
      caption_matches = []

      pattern = '|'.join(rooms_list)
      description_matches = re.findall(pattern, description)
      caption_matches = re.findall(pattern, caption)
      
      description = re.sub(pattern, '', description)
      caption = re.sub(pattern, '', caption)

      caption = "and"+caption[5:]

      if (len(description_matches) != 0):

        if description.startswith("a "):
          description = description[3:]

        if description.startswith("an "):
          description = description[4:]

        room_fusion = union_strings(description_matches[0], caption_matches[0])

      #print(description)
      #print(caption)
      #print(room_fusion)

      input_text = room_fusion + " " + description + " " + caption
      input_ids = tokenizer2.encode(input_text, return_tensors='pt')
      
      output = model2.generate(input_ids=input_ids, max_length=input_ids.shape[1], do_sample=True)
          
      new_description = tokenizer2.decode(output[0], skip_special_tokens=True).strip()

      #print(input_text)
      print(new_description)

      df.loc[index, "blip"] = new_description

      data[filename] = {
        "image": image_path,
        "conditioning_image": conditioning_image_path,
        "caption": new_description
      }

      #print(data)

    except Exception as e:
            print(f"Failed to process image{url}: {str(e)}")


with open('file.json', 'w') as f:
   json.dump(data, f, indent=2)

with open('file.json', 'r') as f:
  data = json.load(f)

with open('new_file.jsonl', 'w') as f:
  for key in data:
    f.write(json.dumps(data[key]) + '\n')

df.to_csv("new_out.csv", index=False)


7000
bathroom with a sink and a shower and sink and shower for
7001
bathroom a patio with a table and chairs and a potted plant and hardwood flooring
7002
living room dining filled with furniture and a flat screen tv and hardwood floor, tv and radiator rack
7003
living room dining a hallway with a clock on the wall and natural light and carpeting
7004
living room filled with furniture and a flat screen tv and natural light, wall mounted ac, tv and carpet,
7005
living room empty filled with furniture and a painting on the wall and natural light, tile floor and crown molding all
7006
living room empty a hallway with a black and white checkered floor and natural light, tile floor and crown molding on
7007
living room with a large entertainment center and tile floor and tv,
7008
living room filled with furniture and a flat screen tv and natural light, tile floor, vaulted ceiling and tv in
7009
living room filled with furniture and a flat screen tv and tv out
7010
living room filled with fu

In [None]:
def train_model(start):
  df = pd.read_csv("out.csv")
  df["blip"] = ""

  finish = 2000

  #df = df.head(1000)
  df = df[start:].head(finish)

  data = {}

  for index, row in df.iterrows():
      if index == 0:
          continue
      
      try:
        url = row[0]
        image_path = url[61:]
        filename = image_path[7:]
        filename = filename[:-4]
        conditioning_image_path = "processed_" + image_path

        print(index)
        #raw_image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
        raw_image = Image.open(requests.get(url, stream=True, timeout=5).raw).convert('RGB')
        
        image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
        description = str(model.generate({"image": image})[0])
        
        caption = row[1]
        description = description.replace('-', ' ')
        caption = caption.replace('-', ' ')

        description_matches = []
        caption_matches = []

        pattern = '|'.join(rooms_list)
        description_matches = re.findall(pattern, description)
        caption_matches = re.findall(pattern, caption)
        
        description = re.sub(pattern, '', description)
        caption = re.sub(pattern, '', caption)

        caption = "and"+caption[5:]

        if (len(description_matches) != 0):

          if description.startswith("a "):
            description = description[3:]

          if description.startswith("an "):
            description = description[4:]

          room_fusion = union_strings(description_matches[0], caption_matches[0])

        #print(description)
        #print(caption)
        #print(room_fusion)

        input_text = room_fusion + " " + description + " " + caption
        input_ids = tokenizer2.encode(input_text, return_tensors='pt')
        
        output = model2.generate(input_ids=input_ids, max_length=input_ids.shape[1], do_sample=True)
            
        new_description = tokenizer2.decode(output[0], skip_special_tokens=True).strip()

        #print(input_text)
        print(new_description)

        df.loc[index, "blip"] = new_description

        data[filename] = {
          "image": image_path,
          "conditioning_image": conditioning_image_path,
          "caption": new_description
        }

        #print(data)

      except Exception as e:
              print(f"Failed to process image{url}: {str(e)}")


  with open('file'+str(start)+'.json', 'w') as f:
    json.dump(data, f, indent=2)

  with open('file'+str(start)+'.json', 'r') as f:
    data = json.load(f)

  with open('new_file'+str(start)+'.jsonl', 'w') as f:
    for key in data:
      f.write(json.dumps(data[key]) + '\n')

  df.to_csv("new_out"+str(start)+".csv", index=False)


In [None]:
from multiprocessing import Process

processes = []
starts = [3000,5000,7000,9000]

for i in range(4):
    print(i)
    p = Process(target=train_model, args=(starts[i],))
    processes.append(p)
    p.start()

for p in processes:
    p.join()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
3188
Failed to process imagehttps://restb-hackathon.s3.amazonaws.com/real_estate_dataset/images/21680__002.jpg: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
8182
Failed to process imagehttps://restb-hackathon.s3.amazonaws.com/real_estate_dataset/images/13342__000.jpg: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start methodFailed to process imagehttps://restb-hackathon.s3.amazonaws.com/real_estate_dataset/images/15479__000.jpg: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
6186

5188
Failed to process imagehttps://restb-hackathon.s3.amazonaws.com/real_estate_dataset/images/7017__000.jpg: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
3189
Fai

Process Process-5:
Process Process-4:
Process Process-7:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
Process Process-6:
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "<ipython-input-12-421a7ebf2fad>", line 25, in train_model
    raw_image = Image.open(requests.get(url, stream=True, timeout=5).raw).convert('RGB')
  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/requests/api.py", line 75, in get
    return request('get', url, params=params, **kwargs)
  File "<ipython-input-12-421a7ebf2fad>", line 25, in train_model
    raw_image = Image.open(requests.get(url, stream=True, timeo