### Importing Libraries

In [53]:
import pandas as pd
from tqdm import tqdm
import os
from shutil import copyfile as copy
import shutil

### Reading Image Dataset

In [26]:
df = pd.read_csv("Imgs.csv")
del df['img_link']
del df['img_id']
del df['likes']
del df['comments']
df.head()

Unnamed: 0,tags,img_path
0,"Clouds, Sky, Atmosphere, Blue Sky",Imgs/A00001.jpg
1,"Bird, Ornithology, Hummingbird",Imgs/A00002.jpg
2,"Sea, Rainbow, Rainfall, Subtropical",Imgs/A00003.jpg
3,"Cherry Blossoms, Road, Japan, Sakura",Imgs/A00004.jpg
4,"Cape Marguerite, Flower, Plant",Imgs/A00005.jpg


### Finding Unique Tags

In [27]:
t = []
for tags in df['tags']:
    t += [tag.strip() for tag in tags.split(',')]
    
tags = list(set(t)) 
print(len(tags))

8015


### Creating Folder for each Tag

In [28]:
for tag in tqdm(tags):
    try:
        os.mkdir('Dataset/' + tag)
    except:
        pass

100%|███████████████████████████████████████████████████████████████████████████| 8015/8015 [00:00<00:00, 90786.78it/s]


### Saving Images in Specific Folders

In [31]:
error = 0

for data in tqdm(df.values):  
    tags = data[0]
    tags = ['Dataset/' + tag.strip() + '/' for tag in tags.split(',')] 
    src = data[1]

    for i in tags:
        dst = i + src.split('/')[-1]
        try:
            copy(src, dst)
        except:
            error += 1

100%|█████████████████████████████████████████████████████████████████████████████| 9104/9104 [00:18<00:00, 491.97it/s]


### Checking Folder Details

In [32]:
folders = os.listdir("Dataset")
print(len(folders))

7968


### Checking number of images in each folder

In [33]:
folder_ = []
freq    = []

for folder in tqdm(folders):
    try:
        freq.append(len(os.listdir('Dataset/' + folder)))
        folder_.append(folder)
    except:
        pass

100%|███████████████████████████████████████████████████████████████████████████| 7968/7968 [00:00<00:00, 32699.87it/s]


### Top 10 Folders with most number of images

In [38]:
df_ = pd.DataFrame()

df_["folder"] = folder_
df_["freq"] = freq

df_.sort_values(by = 'freq', ascending = False).head(10)

Unnamed: 0,folder,freq
4745,Nature,840
225,Animal,609
2743,Flower,549
726,Bird,519
2759,Flowers,506
6095,Sea,306
5347,Plant,285
6776,Sunset,275
2810,Forest,261
7225,Trees,242


### Top 10 Folders with least number of Images

In [39]:
df_.sort_values(by = 'freq', ascending = True).head(10)

Unnamed: 0,folder,freq
2386,emergiendo,0
2085,die hinauf führt,0
2281,Each candle is a soul,0
2047,Der Sonne entgegen,0
2558,Fantasmagòrica vista de los Reyes Magos de Ori...,0
2003,Das versteckte Haus,0
3655,In einem kleinen Städtchen,0
3392,Heavy,1
6169,Sewing,1
3393,Heavy Machine,1


### Total number of Folders having images more than 20

In [49]:
df_[df_['freq'] > 20]

Unnamed: 0,folder,freq
44,Abstract,38
112,Agriculture,24
169,Alps,30
217,Angel,22
225,Animal,609
...,...,...
7881,Yellow,21
7890,Yellow Flower,37
7891,Yellow Flowers,21
7918,Young,23


### Total number of Folders having images less than 20

In [50]:
df_[df_['freq'] <= 20]

Unnamed: 0,folder,freq
0,&quot;All we are saying is give peace a chance...,1
1,&quot;Gray wagtail&quot; found in rivers and m...,1
2,1891. In 1906 she was en route from Peru to Ge...,1
3,1925,1
4,1950S,1
...,...,...
7960,Église Dol de Bretagne,1
7961,сакура,1
7962,„Ich schau dir in die Augen,1
7963,奈良公園の鹿です、人間と共存しています。,1


### Removing Folders with less than 20 images

In [54]:
for folder in df_[df_['freq'] <= 20]["folder"]:
    path = "Dataset/" + folder
    
    if os.path.exists(path):
        shutil.rmtree(path)
        print(f"Directory '{path}' has been removed.")
    else:
        print(f"Directory '{ath}' does not exist.")


Directory 'Dataset/&quot;All we are saying is give peace a chance.&quot; (John Lennon)' has been removed.
Directory 'Dataset/&quot;Gray wagtail&quot; found in rivers and mountain streams in the subalpine zone' has been removed.
Directory 'Dataset/1891. In 1906 she was en route from Peru to Geelong' has been removed.
Directory 'Dataset/1925' has been removed.
Directory 'Dataset/1950S' has been removed.
Directory 'Dataset/2020' has been removed.
Directory 'Dataset/2021' has been removed.
Directory 'Dataset/2022' has been removed.
Directory 'Dataset/3D' has been removed.
Directory 'Dataset/3D Animation' has been removed.
Directory 'Dataset/3D Render' has been removed.
Directory 'Dataset/4' has been removed.
Directory 'Dataset/4 spreads and back cover of the magazine. Name of the editorial &quot;The Monochrome Chapter&quot;' has been removed.
Directory 'Dataset/4 Weihnachtskerzen' has been removed.
Directory 'Dataset/4 X 4' has been removed.
Directory 'Dataset/4-Cyl Boxer' has been removed

Directory 'Dataset/Berries' has been removed.
Directory 'Dataset/Berry' has been removed.
Directory 'Dataset/Berry College' has been removed.
Directory 'Dataset/Berry Jam' has been removed.
Directory 'Dataset/Bethlehem' has been removed.
Directory 'Dataset/Beverage' has been removed.
Directory 'Dataset/Beverages' has been removed.
Directory 'Dataset/Beyond' has been removed.
Directory 'Dataset/Bible' has been removed.
Directory 'Dataset/Bible Cover' has been removed.
Directory 'Dataset/Biceps' has been removed.
Directory 'Dataset/Bichon' has been removed.
Directory 'Dataset/Bicolor Leaves' has been removed.
Directory 'Dataset/Bicycle' has been removed.
Directory 'Dataset/Bicycle Path' has been removed.
Directory 'Dataset/Bicycle Sign' has been removed.
Directory 'Dataset/Bicycles' has been removed.
Directory 'Dataset/bienenkisten' has been removed.
Directory 'Dataset/Big' has been removed.
Directory 'Dataset/Big Ben' has been removed.
Directory 'Dataset/Big Cat' has been removed.
Direc

Directory 'Dataset/Chrysanthemum' has been removed.
Directory 'Dataset/Chrysanthemums' has been removed.
Directory 'Dataset/Chrysler' has been removed.
Directory 'Dataset/Chuka Wakame' has been removed.
Directory 'Dataset/Church Organ' has been removed.
Directory 'Dataset/Church Tower' has been removed.
Directory 'Dataset/Church Window' has been removed.
Directory 'Dataset/Ciconia Ciconia' has been removed.
Directory 'Dataset/Cigarette' has been removed.
Directory 'Dataset/Cilantro' has been removed.
Directory 'Dataset/Cinderella' has been removed.
Directory 'Dataset/Cinema' has been removed.
Directory 'Dataset/Cinnamon' has been removed.
Directory 'Dataset/Cinnamon Sticks' has been removed.
Directory 'Dataset/Circle' has been removed.
Directory 'Dataset/Circles' has been removed.
Directory 'Dataset/Circuit' has been removed.
Directory 'Dataset/Circus' has been removed.
Directory 'Dataset/Citadel' has been removed.
Directory 'Dataset/Cities' has been removed.
Directory 'Dataset/Citrus'

Directory 'Dataset/Cotton' has been removed.
Directory 'Dataset/Cottontail Rabbit' has been removed.
Directory 'Dataset/Couch' has been removed.
Directory 'Dataset/Cough' has been removed.
Directory 'Dataset/Countries' has been removed.
Directory 'Dataset/Country' has been removed.
Directory 'Dataset/Country House' has been removed.
Directory 'Dataset/County Fair' has been removed.
Directory 'Dataset/Couples' has been removed.
Directory 'Dataset/Coupon' has been removed.
Directory 'Dataset/Courses' has been removed.
Directory 'Dataset/Court' has been removed.
Directory 'Dataset/Courtyard' has been removed.
Directory 'Dataset/Cover' has been removed.
Directory 'Dataset/Covered' has been removed.
Directory 'Dataset/Covered Market' has been removed.
Directory 'Dataset/Covid' has been removed.
Directory 'Dataset/Covid-19' has been removed.
Directory 'Dataset/Covid19' has been removed.
Directory 'Dataset/Cow' has been removed.
Directory 'Dataset/Cowboy' has been removed.
Directory 'Dataset/

Directory 'Dataset/Farewell' has been removed.
Directory 'Dataset/Farm Animal' has been removed.
Directory 'Dataset/Farm Animals' has been removed.
Directory 'Dataset/Farm Yard' has been removed.
Directory 'Dataset/Farmer' has been removed.
Directory 'Dataset/Farmers' has been removed.
Directory 'Dataset/Farmhouse' has been removed.
Directory 'Dataset/Farming' has been removed.
Directory 'Dataset/Farming Vehicle' has been removed.
Directory 'Dataset/Farmland' has been removed.
Directory 'Dataset/Farmstead' has been removed.
Directory 'Dataset/Faro de Fisterra' has been removed.
Directory 'Dataset/Fashion Girl' has been removed.
Directory 'Dataset/Fast' has been removed.
Directory 'Dataset/Fast Food' has been removed.
Directory 'Dataset/Father' has been removed.
Directory 'Dataset/Father And Son' has been removed.
Directory 'Dataset/Faucet' has been removed.
Directory 'Dataset/Fawn' has been removed.
Directory 'Dataset/Fdp' has been removed.
Directory 'Dataset/Feahters' has been removed

Directory 'Dataset/Hacking' has been removed.
Directory 'Dataset/Hafez' has been removed.
Directory 'Dataset/Hair' has been removed.
Directory 'Dataset/Hair Cut' has been removed.
Directory 'Dataset/Hair Salon' has been removed.
Directory 'Dataset/Hair Stylist' has been removed.
Directory 'Dataset/Haircut' has been removed.
Directory 'Dataset/Hairdresser' has been removed.
Directory 'Dataset/Hairpin Bend' has been removed.
Directory 'Dataset/Half' has been removed.
Directory 'Dataset/Half Closed' has been removed.
Directory 'Dataset/Half Moon' has been removed.
Directory 'Dataset/Half-Timbered Houses' has been removed.
Directory 'Dataset/Hall' has been removed.
Directory 'Dataset/Halloween' has been removed.
Directory 'Dataset/Hallstatt' has been removed.
Directory 'Dataset/Hallway' has been removed.
Directory 'Dataset/Halm' has been removed.
Directory 'Dataset/Halogen' has been removed.
Directory 'Dataset/Halves' has been removed.
Directory 'Dataset/Ham' has been removed.
Directory 'D

Directory 'Dataset/Honey Bee' has been removed.
Directory 'Dataset/Honey Bees' has been removed.
Directory 'Dataset/Honey Candy' has been removed.
Directory 'Dataset/Honey Factory' has been removed.
Directory 'Dataset/Honeybee' has been removed.
Directory 'Dataset/Hong Kong' has been removed.
Directory 'Dataset/Hongyadong' has been removed.
Directory 'Dataset/Honolulu' has been removed.
Directory 'Dataset/Honor Award' has been removed.
Directory 'Dataset/Hood' has been removed.
Directory 'Dataset/Hood Ornament' has been removed.
Directory 'Dataset/Hooded' has been removed.
Directory 'Dataset/Hooded Crow' has been removed.
Directory 'Dataset/Hoodie' has been removed.
Directory 'Dataset/Hooray' has been removed.
Directory 'Dataset/Hop' has been removed.
Directory 'Dataset/Hope' has been removed.
Directory 'Dataset/Horizon' has been removed.
Directory 'Dataset/Horn' has been removed.
Directory 'Dataset/Horned' has been removed.
Directory 'Dataset/Hornet' has been removed.
Directory 'Datas

Directory 'Dataset/Lunar' has been removed.
Directory 'Dataset/Lunar New Year' has been removed.
Directory 'Dataset/Lunar Surface' has been removed.
Directory 'Dataset/Lunch' has been removed.
Directory 'Dataset/Lupins' has been removed.
Directory 'Dataset/Lurk' has been removed.
Directory 'Dataset/Lushan' has been removed.
Directory 'Dataset/Lute' has been removed.
Directory 'Dataset/Luxury' has been removed.
Directory 'Dataset/Luxury Bathroom' has been removed.
Directory 'Dataset/Luxury Car' has been removed.
Directory 'Dataset/Luxury Villa' has been removed.
Directory 'Dataset/Lying' has been removed.
Directory 'Dataset/Lying Down' has been removed.
Directory 'Dataset/Lynx' has been removed.
Directory 'Dataset/Lyon' has been removed.
Directory 'Dataset/M31' has been removed.
Directory 'Dataset/Macaron' has been removed.
Directory 'Dataset/Macarons' has been removed.
Directory 'Dataset/Macaroon' has been removed.
Directory 'Dataset/Macaw' has been removed.
Directory 'Dataset/Macaws' 

Directory 'Dataset/Oak Kitten' has been removed.
Directory 'Dataset/Oakley' has been removed.
Directory 'Dataset/Oatmeal' has been removed.
Directory 'Dataset/Oatmeal Cookies' has been removed.
Directory 'Dataset/Oats' has been removed.
Directory 'Dataset/Obesity' has been removed.
Directory 'Dataset/Object' has been removed.
Directory 'Dataset/Objects' has been removed.
Directory 'Dataset/Observation' has been removed.
Directory 'Dataset/Observation Hut' has been removed.
Directory 'Dataset/Occult' has been removed.
Directory 'Dataset/Ocean Liner' has been removed.
Directory 'Dataset/Ocean Waves' has been removed.
Directory 'Dataset/Ocelot' has been removed.
Directory 'Dataset/Ochna Serrulata' has been removed.
Directory 'Dataset/Odessa' has been removed.
Directory 'Dataset/Of Nature' has been removed.
Directory 'Dataset/Off' has been removed.
Directory 'Dataset/Office Building' has been removed.
Directory 'Dataset/Office Buildings' has been removed.
Directory 'Dataset/Office Desk' ha

Directory 'Dataset/Raspberries' has been removed.
Directory 'Dataset/Raspberries And Blackberries' has been removed.
Directory 'Dataset/Raspberry' has been removed.
Directory 'Dataset/Raspberry Cake' has been removed.
Directory 'Dataset/Raspberry Pie' has been removed.
Directory 'Dataset/Rat' has been removed.
Directory 'Dataset/Rate' has been removed.
Directory 'Dataset/Raven' has been removed.
Directory 'Dataset/Ravenia' has been removed.
Directory 'Dataset/Ravens' has been removed.
Directory 'Dataset/Raw' has been removed.
Directory 'Dataset/Rays' has been removed.
Directory 'Dataset/Reach' has been removed.
Directory 'Dataset/Read' has been removed.
Directory 'Dataset/Reading' has been removed.
Directory 'Dataset/Real Estate' has been removed.
Directory 'Dataset/Rebel' has been removed.
Directory 'Dataset/Receive' has been removed.
Directory 'Dataset/Recipe' has been removed.
Directory 'Dataset/Reciting' has been removed.
Directory 'Dataset/Recreation' has been removed.
Directory '

Directory 'Dataset/Solar System' has been removed.
Directory 'Dataset/Soldier' has been removed.
Directory 'Dataset/Soldiers' has been removed.
Directory 'Dataset/Solid' has been removed.
Directory 'Dataset/Solidago' has been removed.
Directory 'Dataset/Solidago Canadensis' has been removed.
Directory 'Dataset/Solidarity' has been removed.
Directory 'Dataset/Solitude' has been removed.
Directory 'Dataset/Solo' has been removed.
Directory 'Dataset/Solstice' has been removed.
Directory 'Dataset/Soluble' has been removed.
Directory 'Dataset/Son' has been removed.
Directory 'Dataset/Song' has been removed.
Directory 'Dataset/Songbird' has been removed.
Directory 'Dataset/Sonnenaufgang am Mittelmeer… der Himmel steht in Flammen…' has been removed.
Directory 'Dataset/Sonnenkopf' has been removed.
Directory 'Dataset/Sopron' has been removed.
Directory 'Dataset/Sorry' has been removed.
Directory 'Dataset/Sossusvlei' has been removed.
Directory 'Dataset/Soumaya Museum' has been removed.
Directo

Directory 'Dataset/Tree Frog' has been removed.
Directory 'Dataset/Tree Hollow' has been removed.
Directory 'Dataset/Tree House' has been removed.
Directory 'Dataset/Tree Lined' has been removed.
Directory 'Dataset/Tree Lined Path' has been removed.
Directory 'Dataset/Tree Pruning' has been removed.
Directory 'Dataset/Tree Stump' has been removed.
Directory 'Dataset/Tree Trunk' has been removed.
Directory 'Dataset/Trekking' has been removed.
Directory 'Dataset/trendy retro pattern' has been removed.
Directory 'Dataset/Triangle' has been removed.
Directory 'Dataset/Tribe' has been removed.
Directory 'Dataset/Trickle' has been removed.
Directory 'Dataset/Tricolor' has been removed.
Directory 'Dataset/Tricolored Heron' has been removed.
Directory 'Dataset/Trinity College' has been removed.
Directory 'Dataset/Trinket' has been removed.
Directory 'Dataset/Trip' has been removed.
Directory 'Dataset/Triumphal Arch' has been removed.
Directory 'Dataset/Trolltunga' has been removed.
Directory '