In [1]:
#First Create the test cases and goals as TDD is a good practice
#Give the best fitting images according to the search keys
#Find the tags of an image given the search keys.

#We have pictures with alt text provided. Azure has a great API to create alt text
#I used the Azure Computer Vision examples as my database but their API could be implemented directly.


#Sample cases:
#Image1: People waiting for train in subway.
#Tags: train, platform, station, subway, people
    

In [100]:
import os
import glob
from PIL import Image
import pickle as pk

In [127]:
class SearchEngine:
    def __init__(self, img_path, tag_path):
        

        self.images = [f for f in glob.glob(img_path+'*.png')]
        with open(tag_path, "rb") as inp:
            self.tags = pk.load(inp)

    def print_tags(self):
        print(self.tags)
    
    def search_text(self, key, op="and", suppress_images=False):
        """ Searches keys and displays images and displays image given the tagstext given the keys 
            args:
                key: list of strings, searched keys
                op: "and" or "or", indicates logical operation of keys
                suppress_images: bool, True - do not display images, False - display images. Default False. 
                
        """
        
        found_img = set()
        if op=="or":
            for k in key:
                for i in tags_simplified:
                    if k in tags_simplified[i]:
                        found_img.add(i)
        else: #and
            for i in tags_simplified:
                if all(k in tags_simplified[i] for k in key):
                    found_img.add(i)
        
        if not suppress_images:
            self.open_image(found_img)
        return found_img
   
    def open_image(self, images):                
        for img in images:
            im = Image.open(img)  
            im.show() 
            
    
    
#search_text("person")

In [15]:
#Azure API response
tags = ['[ { "name": "train", "confidence": 0.9974923 }, { "name": "platform", "confidence": 0.9955776 }, { "name": "station", "confidence": 0.979665935 }, { "name": "indoor", "confidence": 0.9272351 }, { "name": "subway", "confidence": 0.83886826 }, { "name": "clothing", "confidence": 0.5561282 }, { "name": "person", "confidence": 0.505803 }, { "name": "pulling", "confidence": 0.431911618 } ]', 
        '[ { "name": "person", "confidence": 0.9945455 }, { "name": "laptop", "confidence": 0.9635144 }, { "name": "computer", "confidence": 0.9579097 }, { "name": "clothing", "confidence": 0.9370073 }, { "name": "indoor", "confidence": 0.9179773 }, { "name": "furniture", "confidence": 0.906451 }, { "name": "table", "confidence": 0.742828846 }, { "name": "people", "confidence": 0.720105648 }, { "name": "chair", "confidence": 0.6349643 }, { "name": "woman", "confidence": 0.527442455 } ]', 
        '[ { "name": "skating", "confidence": 0.999951363 }, { "name": "snowboarding", "confidence": 0.9893889 }, { "name": "sports equipment", "confidence": 0.9722208 }, { "name": "person", "confidence": 0.959769964 }, { "name": "roller skating", "confidence": 0.946092963 }, { "name": "skiing", "confidence": 0.92313683 }, { "name": "man", "confidence": 0.9193816 }, { "name": "outdoor", "confidence": 0.9109124 }, { "name": "boardsport", "confidence": 0.907244742 }, { "name": "riding", "confidence": 0.8984571 }, { "name": "sport", "confidence": 0.871290743 }, { "name": "footwear", "confidence": 0.862546742 }, { "name": "snowboard", "confidence": 0.8349905 }, { "name": "skate", "confidence": 0.801233232 }, { "name": "skateboarder", "confidence": 0.792592764 }, { "name": "individual sports", "confidence": 0.779822469 }, { "name": "skateboarding equipment", "confidence": 0.777853966 }, { "name": "skateboard", "confidence": 0.746669054 }, { "name": "skateboarding", "confidence": 0.7466688 }, { "name": "ski", "confidence": 0.6588002 }, { "name": "jumping", "confidence": 0.645534158 }, { "name": "extreme sport", "confidence": 0.5737016 }, { "name": "kickflip", "confidence": 0.501751363 }, { "name": "male", "confidence": 0.15158996 } ]', 
        '[ { "name": "text", "confidence": 0.9999137 }, { "name": "grass", "confidence": 0.999893069 }, { "name": "outdoor", "confidence": 0.9880197 }, { "name": "bicycle", "confidence": 0.9697462 }, { "name": "bicycle wheel", "confidence": 0.897627 }, { "name": "sign", "confidence": 0.8423048 }, { "name": "bike", "confidence": 0.7690854 }, { "name": "wheel", "confidence": 0.7583429 }, { "name": "land vehicle", "confidence": 0.6875147 }, { "name": "vehicle", "confidence": 0.575855851 } ]']

In [98]:
tags_simplified = {"subway-people-wait.png":{'train':0.99, 'platform':0.99, 'station':0.98,  'subway': 0.83886826, 'clothing': 0.5561282, 'person': 0.505803, 'pulling': 0.43191161},
                    "person-skateboard.png":{"skating": 0.999951363 ,"snowboarding": 0.9893889,"sports equipment": 0.9722208, "person": 0.959769964, "roller skating": 0.946092963,"skiing": 0.92313683,"man": 0.9193816, "outdoor": 0.9109124, "boardsport": 0.907244742, "riding": 0.8984571, "sport": 0.871290743 },
                    'People-talkin-sitting.png': {'person': 0.9945455, 'laptop': 0.9635144, 'computer': 0.9579097, 'clothing': 0.9370073, 'indoor': 0.9179773, 'furniture': 0.906451, 'table': 0.742828846, 'people': 0.720105648, 'chair': 0.6349643, 'woman': 0.527442455},
                    'bike-on-tree.png':{"text": 0.9999137,"grass": 0.999893069,"outdoor": 0.9880197,"bicycle": 0.9697462, "bicycle wheel": 0.897627,"sign": 0.8423048, "bike": 0.7690854, "wheel": 0.7583429, "land vehicle": 0.6875147, "vehicle": 0.575855851 }    
                  }
                  

## Test Cases


In [129]:
import unittest
  
class EngineTests(unittest.TestCase):
    engine = SearchEngine('./', "tags.pk")
   
    def test_and(self):
        self.assertEqual(self.engine.search_text(["person","train"], suppress_images=True), {"subway-people-wait.png"}, "And search was not successful.")
        
    def test_or(self):
        self.assertEqual(self.engine.search_text(["person","train"], "or", suppress_images=True), {'subway-people-wait.png', 'People-talkin-sitting.png', 'person-skateboard.png'}, "OR search was not successful.")
    def test_one_word(self):
        self.assertEqual(self.engine.search_text(["bicycle"], suppress_images=True), {"bike-on-tree.png"}, "One keyword search was not successful.")
        

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

...
----------------------------------------------------------------------
Ran 3 tests in 0.054s

OK


## Playground
You can play with the search engine here.

In [None]:
KEYS == ["your keywords here"]
engine = SearchEngine('./', "tags.pk")
engine.search_text(KEYS)