In [1]:
from tqdm.notebook import tqdm, trange

In [2]:
tqdm.pandas()

In [3]:
import sys

In [4]:
import glob
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

In [5]:
import os

# General packages
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import PIL.Image

from IPython.display import Image, display
import warnings
warnings.filterwarnings("ignore")

In [6]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/barzamini/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/barzamini/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [7]:
def find_gpus(nums=6):
    os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp_free_gpus')
    with open('tmp_free_gpus', 'r') as lines_txt:
        frees = lines_txt.readlines()
        idx_freeMemory_pair = [ (idx,int(x.split()[2]))
                              for idx,x in enumerate(frees) ]
    idx_freeMemory_pair.sort(key=lambda my_tuple:my_tuple[1],reverse=True)
    usingGPUs = [str(idx_memory_pair[0])
                    for idx_memory_pair in idx_freeMemory_pair[:nums] ]
    usingGPUs =  ','.join(usingGPUs)
    print('using GPU idx: #', usingGPUs)
    return usingGPUs

In [8]:
os.environ['CUDA_VISIBLE_DEVICES'] = find_gpus(nums=2)

using GPU idx: # 0,1


In [9]:
from nltk.corpus import stopwords
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [10]:
lemmatizer = WordNetLemmatizer()

In [11]:
os.listdir('/raid/AISSEL/Hamed/datasets/wit/')

['wit_v1.train.all-00007-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00009-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-1percent_sample.tsv',
 'wit_v1.train.all-00000-of-00010_en_csim.tsv',
 'images',
 'selected_url.tsv',
 'wit0',
 'wit1',
 'wit2.zip',
 'wit3.zip',
 'wit4.zip',
 'wit5.zip',
 'wit6.zip',
 'wit7.zip',
 'wit8.zip',
 'wit9.zip',
 'wit_v1.train.all-00000-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00001-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00002-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00003-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00004-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00005-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00006-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00008-of-00010_context_caption_en_sbert_c.tsv']

In [12]:
root_path = '/raid/AISSEL/Hamed/datasets/wit'

In [13]:
f_names =[el for el in os.listdir('/raid/AISSEL/Hamed/datasets/wit/') if el.endswith('_en_sbert_c.tsv')]
f_names

['wit_v1.train.all-00007-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00009-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00000-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00001-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00002-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00003-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00004-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00005-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00006-of-00010_context_caption_en_sbert_c.tsv',
 'wit_v1.train.all-00008-of-00010_context_caption_en_sbert_c.tsv']

In [14]:
df = pd.read_csv(f'{root_path}/{f_names[0]}', sep='\t')
df = df.drop('Unnamed: 0', 1)
df = df[df["language"]=='en']
df

Unnamed: 0,index,language,page_url,image_url,page_title,section_title,hierarchical_section_title,caption_reference_description,caption_attribution_description,caption_alt_text_description,...,192_context_score,20_context_score,296_context_score,144_context_score,12_context_score,224_context_score,250_context_score,376_context_score,concept_caption_score,concept_context_score
0,8,en,https://en.wikipedia.org/wiki/Bianca_Atzei,https://upload.wikimedia.org/wikipedia/commons...,Bianca Atzei,Sanremo Music Festival 2015 and Bianco e nero,Bianca Atzei / Career / Sanremo Music Festival...,Bianca Atzei's signature,Italiano: Firma di Bianca Atzei,,...,-0.038851,0.089934,-0.015047,0.043425,0.027713,-0.098772,0.032884,-0.122536,0.090904,0.040136
1,16,en,https://en.wikipedia.org/wiki/International_Co...,https://upload.wikimedia.org/wikipedia/commons...,International Committee of the Red Cross archives,World War II,International Committee of the Red Cross archi...,The Agency archives at Plainpalais,Nederlands: Collectie / Archief&#160;: Fotocol...,,...,-0.091849,0.002981,0.042102,0.053072,-0.039682,-0.103131,0.016510,-0.044889,-0.008080,-0.010542
2,20,en,https://en.wikipedia.org/wiki/Flight_helmet,https://upload.wikimedia.org/wikipedia/commons...,Flight helmet,History of flight helmets,Flight helmet / History of flight helmets,,English: A-9 oxygen mask with B-6 winter helme...,,...,0.016546,-0.028779,-0.004041,0.086849,-0.038401,0.074521,0.059790,0.012366,0.118914,0.065914
3,22,en,https://en.wikipedia.org/wiki/List_of_burials_...,https://upload.wikimedia.org/wikipedia/commons...,List of burials at Serafimovskoe Cemetery,Sport,List of burials at Serafimovskoe Cemetery / In...,,English: Russian coach Vladimir Kazachyonok Ру...,,...,-0.081875,-0.031366,0.061323,-0.060698,-0.051773,0.031197,0.036632,-0.057523,0.027825,0.002939
4,29,en,https://en.wikipedia.org/wiki/List_of_Historic...,https://upload.wikimedia.org/wikipedia/commons...,List of Historic Sites of Japan (Okayama),Prefectural Historic Sites,List of Historic Sites of Japan (Okayama) / Pr...,,"English: Top of kouen part and Stone coffin, T...",,...,-0.066597,-0.022394,0.130063,-0.007258,-0.076814,0.081103,-0.060480,0.085220,-0.033427,-0.017023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
541723,3703714,en,https://simple.wikipedia.org/wiki/Vibraye,http://upload.wikimedia.org/wikipedia/commons/...,Vibraye,,Vibraye,coat of arms,,,...,-0.011430,0.004210,0.059705,-0.041704,0.002616,0.007550,-0.172665,-0.067527,0.230597,-0.065141
541724,3703716,en,https://en.wikipedia.org/wiki/Wilhelm_Schmiede...,https://upload.wikimedia.org/wikipedia/commons...,Wilhelm Schmiedeberg,,Wilhelm Schmiedeberg,Wilhelm Schmiedeberg \n Self portrait of a mir...,Deutsch: seitenverkehrtes Selbstbildnis von Wi...,,...,0.036386,0.020026,-0.041863,0.040273,-0.018524,-0.067019,0.127569,0.030676,0.163752,0.059812
541725,3703719,en,https://en.wikipedia.org/wiki/Political_system...,https://upload.wikimedia.org/wikipedia/commons...,Political systems of Imperial China,Three Lords and Nine Ministers system,Political systems of Imperial China / Central ...,,A pottery model of a palace from the Chinese H...,,...,0.065037,0.004745,-0.082471,-0.012358,-0.086395,0.022315,-0.032922,0.011763,0.008349,-0.030208
541726,3703724,en,https://en.wikipedia.org/wiki/Rehau,https://upload.wikimedia.org/wikipedia/commons...,Rehau,,Rehau,Rehau,Deutsch: Ansicht der Stadt Rehau vom Kornberg,Rehau,...,-0.024338,-0.063756,0.006761,-0.049635,-0.086546,-0.120204,0.054417,-0.033827,0.192601,-0.026689


In [15]:
caption_score_list = [el for el in df.columns if 'caption_score' in el]
print(caption_score_list)

['196_caption_score', '412_caption_score', '172_caption_score', '363_caption_score', '198_caption_score', '114_caption_score', '419_caption_score', '294_caption_score', '388_caption_score', '314_caption_score', '98_caption_score', '80_caption_score', '134_caption_score', '432_caption_score', '394_caption_score', '53_caption_score', '56_caption_score', '150_caption_score', '278_caption_score', '-1_caption_score', '43_caption_score', '265_caption_score', '345_caption_score', '378_caption_score', '11_caption_score', '331_caption_score', '202_caption_score', '169_caption_score', '327_caption_score', '309_caption_score', '68_caption_score', '216_caption_score', '40_caption_score', '99_caption_score', '426_caption_score', '344_caption_score', '47_caption_score', '8_caption_score', '416_caption_score', '112_caption_score', '137_caption_score', '441_caption_score', '192_caption_score', '20_caption_score', '296_caption_score', '144_caption_score', '12_caption_score', '224_caption_score', '250_c

In [16]:
caption_score_list = ['196_caption_score', '412_caption_score', '172_caption_score', '363_caption_score', 
                      '198_caption_score', '114_caption_score', '419_caption_score', '294_caption_score', 
                      '388_caption_score', '314_caption_score', '98_caption_score', '80_caption_score', 
                      '134_caption_score', '432_caption_score', '394_caption_score', '53_caption_score', 
                      '56_caption_score', '150_caption_score', '278_caption_score', '-1_caption_score', 
                      '43_caption_score', '265_caption_score', '345_caption_score', '378_caption_score', 
                      '11_caption_score', '331_caption_score', '202_caption_score', '169_caption_score', 
                      '327_caption_score', '309_caption_score', '68_caption_score', '216_caption_score', 
                      '40_caption_score', '99_caption_score', '426_caption_score', '344_caption_score', 
                      '47_caption_score', '8_caption_score', '416_caption_score', '112_caption_score', 
                      '137_caption_score', '441_caption_score', '192_caption_score', '20_caption_score', 
                      '296_caption_score', '144_caption_score', '12_caption_score', '224_caption_score', 
                      '250_caption_score', '376_caption_score']

In [17]:
context_score_list = [el for el in df.columns if 'context_score' in el]
context_score_list = context_score_list[:-1]
print(context_score_list)

['196_context_score', '412_context_score', '172_context_score', '363_context_score', '198_context_score', '114_context_score', '419_context_score', '294_context_score', '388_context_score', '314_context_score', '98_context_score', '80_context_score', '134_context_score', '432_context_score', '394_context_score', '53_context_score', '56_context_score', '150_context_score', '278_context_score', '-1_context_score', '43_context_score', '265_context_score', '345_context_score', '378_context_score', '11_context_score', '331_context_score', '202_context_score', '169_context_score', '327_context_score', '309_context_score', '68_context_score', '216_context_score', '40_context_score', '99_context_score', '426_context_score', '344_context_score', '47_context_score', '8_context_score', '416_context_score', '112_context_score', '137_context_score', '441_context_score', '192_context_score', '20_context_score', '296_context_score', '144_context_score', '12_context_score', '224_context_score', '250_c

In [18]:
im_c = ['image_url', 'caption']
cap_s = ['concept_caption_score']
con_s = ['concept_context_score']
col_list = caption_score_list + context_score_list + im_c + cap_s + con_s

In [19]:

col_list_1 = context_score_list + im_c + con_s

In [20]:
csim_df = pd.DataFrame(columns = col_list)
for idx, fn in tqdm(enumerate(f_names)):
#     if idx == 2:
#         break
    df = pd.read_csv(f'{root_path}/{fn}', sep='\t')
    df = df.drop('Unnamed: 0', 1)
    df = df[df["language"]=='en']
    df = df[col_list]
#     df_1 = df_1.sort_values(by=['196_score'], ascending=False)
    df = df.reset_index(drop=True)
    csim_df = pd.concat([csim_df, df], ignore_index=True)
    

0it [00:00, ?it/s]

In [21]:
# csim_df.sort_values(by=['196_score'], ascending=False)
csim_df

Unnamed: 0,196_caption_score,412_caption_score,172_caption_score,363_caption_score,198_caption_score,114_caption_score,419_caption_score,294_caption_score,388_caption_score,314_caption_score,...,296_context_score,144_context_score,12_context_score,224_context_score,250_context_score,376_context_score,image_url,caption,concept_caption_score,concept_context_score
0,0.014023,0.048338,0.072041,0.035916,-0.046189,0.007926,0.033050,0.015909,0.051816,0.022079,...,-0.015047,0.043425,0.027713,-0.098772,0.032884,-0.122536,https://upload.wikimedia.org/wikipedia/commons...,Bianca Atzei's signature Italiano: Firma di Bi...,0.090904,0.040136
1,-0.087317,-0.021710,-0.051332,-0.061621,0.014076,0.049088,-0.088426,-0.019032,-0.007908,-0.003852,...,0.042102,0.053072,-0.039682,-0.103131,0.016510,-0.044889,https://upload.wikimedia.org/wikipedia/commons...,The Agency archives at Plainpalais Nederlands:...,-0.008080,-0.010542
2,-0.023712,0.000274,0.003510,-0.040632,-0.034856,0.131114,0.052585,0.048979,-0.074227,0.089140,...,-0.004041,0.086849,-0.038401,0.074521,0.059790,0.012366,https://upload.wikimedia.org/wikipedia/commons...,English: A-9 oxygen mask with B-6 winter helm...,0.118914,0.065914
3,-0.059316,-0.073104,-0.019552,-0.094266,0.022857,0.071975,-0.046809,-0.035883,0.035720,-0.060381,...,0.061323,-0.060698,-0.051773,0.031197,0.036632,-0.057523,https://upload.wikimedia.org/wikipedia/commons...,English: Russian coach Vladimir Kazachyonok Р...,0.027825,0.002939
4,-0.100309,-0.125802,-0.069639,-0.151786,-0.100653,-0.070315,-0.109904,-0.068974,-0.122775,-0.067142,...,0.130063,-0.007258,-0.076814,0.081103,-0.060480,0.085220,https://upload.wikimedia.org/wikipedia/commons...,"English: Top of kouen part and Stone coffin, ...",-0.033427,-0.017023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5411973,0.010525,0.086484,0.072844,0.019646,0.141648,0.080192,0.064250,0.023301,0.082798,0.125770,...,0.062797,0.007082,0.031349,0.137345,0.030877,0.095259,https://upload.wikimedia.org/wikipedia/commons...,"Shek Pik Reservoir, with Shek Pik Prison visib...",0.121024,-0.009627
5411974,0.064163,0.140261,0.167117,0.088087,0.101651,0.059989,0.258787,0.109619,0.139930,0.124345,...,0.103477,0.035289,0.014564,0.147341,0.006568,-0.036180,https://upload.wikimedia.org/wikipedia/commons...,"English: University Avenue, University Of Gla...",0.180178,0.049142
5411975,0.000244,-0.043786,0.052866,-0.048434,-0.027164,-0.027894,-0.043085,0.009353,-0.053874,0.003057,...,-0.027486,0.058317,-0.107188,-0.113409,-0.003040,-0.091142,https://upload.wikimedia.org/wikipedia/commons...,English: Anonymous 16th century Portuguese il...,0.054884,-0.064157
5411976,0.081584,0.030935,0.149223,0.040963,0.050116,0.186897,0.079819,0.104817,0.024297,0.135771,...,-0.010144,0.087509,0.023603,0.009708,0.147209,-0.045105,https://upload.wikimedia.org/wikipedia/commons...,Quagliotto at the 2018 European Road Cycling C...,0.127953,0.110020


In [26]:
csim_df.describe()

Unnamed: 0,196_caption_score,412_caption_score,172_caption_score,363_caption_score,198_caption_score,114_caption_score,419_caption_score,294_caption_score,388_caption_score,314_caption_score,...,192_context_score,20_context_score,296_context_score,144_context_score,12_context_score,224_context_score,250_context_score,376_context_score,concept_caption_score,concept_context_score
count,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,...,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0
mean,0.03316916,0.02898722,0.04938595,0.009565755,0.03607464,0.04620839,0.05406117,0.03718228,0.01685648,0.05392719,...,0.003846679,0.006175797,0.03927394,0.000752269,0.01043332,0.01692591,0.01978357,0.007244231,0.08732864,0.02756854
std,0.06863361,0.06478608,0.06895159,0.06071932,0.07447689,0.06934013,0.07638213,0.06107245,0.06753839,0.07228514,...,0.061788,0.06737285,0.0873531,0.06067205,0.07671057,0.08884871,0.06969397,0.07776987,0.06997118,0.06684423
min,-0.2543995,-0.2410043,-0.2478864,-0.2473019,-0.2808687,-0.2716275,-0.2415859,-0.2307905,-0.2826918,-0.2457126,...,-0.2946304,-0.2907839,-0.2930294,-0.2660354,-0.3073893,-0.328301,-0.2828747,-0.384506,-0.2394902,-0.2688895
25%,-0.01334092,-0.01534086,0.003201978,-0.03199529,-0.01532192,-0.001223383,0.002334806,-0.003728528,-0.02948788,0.004858578,...,-0.03841725,-0.03938309,-0.02109603,-0.04113004,-0.04181451,-0.04457409,-0.02812582,-0.04579195,0.03980459,-0.01782694
50%,0.02696826,0.02532269,0.04495488,0.007484896,0.03195039,0.04330221,0.04744296,0.03457904,0.01583168,0.0494673,...,0.002965325,0.00436971,0.0324168,-0.001201854,0.006485071,0.01087677,0.01813048,0.004907532,0.086847,0.02613318
75%,0.0721949,0.06901627,0.08990105,0.04876487,0.08285133,0.09029126,0.09732897,0.07442215,0.06187467,0.09739184,...,0.0449262,0.04921226,0.0911472,0.04067654,0.05814612,0.07209412,0.06618579,0.05781632,0.1341138,0.07054175
max,0.7550066,0.6103925,0.6149387,0.5102714,0.61045,0.7142168,0.6140311,0.6116345,0.5457093,0.6769031,...,0.5750273,0.5424188,0.5708867,0.4586499,0.6041094,0.589149,0.4600561,0.5546166,0.7286295,0.6892114


In [27]:
# df_p = csim_df[csim_df['concept_caption_score'] >= csim_df['concept_caption_score'].mean()]
df_p = csim_df[csim_df['concept_caption_score'] >= 0.134]
df_p = df_p.reset_index(drop=True)
df_p

Unnamed: 0,196_caption_score,412_caption_score,172_caption_score,363_caption_score,198_caption_score,114_caption_score,419_caption_score,294_caption_score,388_caption_score,314_caption_score,...,296_context_score,144_context_score,12_context_score,224_context_score,250_context_score,376_context_score,image_url,caption,concept_caption_score,concept_context_score
0,-0.059051,0.034478,0.017459,0.072293,0.042578,-0.037894,0.066543,0.081917,0.112965,-0.000315,...,0.007106,0.075357,-0.002062,0.038552,0.175929,0.096650,https://upload.wikimedia.org/wikipedia/commons...,English: A headshot of Peter Weinstein,0.151517,0.155621
1,0.113252,0.179848,0.162650,0.118061,0.312920,0.144430,0.225542,0.120647,0.122567,0.213548,...,0.226437,0.026096,0.060794,0.090376,0.071820,0.106896,https://upload.wikimedia.org/wikipedia/commons...,Exterior of Lone Star Park English: Lone Star ...,0.224927,0.040659
2,0.129829,0.031143,0.013498,-0.028030,0.086818,0.067837,0.088030,-0.006901,0.007030,0.077067,...,0.095041,0.038772,0.083501,0.006546,0.013055,-0.022351,https://upload.wikimedia.org/wikipedia/commons...,English: Robert Hooks,0.182111,0.036132
3,0.137582,0.177523,0.207521,0.109279,0.183949,0.168617,0.218117,0.136698,0.086362,0.251034,...,0.130158,0.030982,0.069321,0.251760,0.020972,0.044483,https://upload.wikimedia.org/wikipedia/commons...,English: Side of the Obelisk facing Hyde Park...,0.218246,0.139382
4,0.049927,0.130767,0.158559,0.042664,0.065716,0.142656,0.184495,0.086053,0.024383,0.139034,...,0.011488,0.097181,0.135434,-0.053385,-0.041917,-0.033439,https://upload.wikimedia.org/wikipedia/commons...,Coppola at the 2001 Cannes Film Festival Coppo...,0.166897,0.043755
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1355833,0.088775,0.078180,0.104002,0.070440,0.024088,0.068000,0.119280,0.069382,0.015567,0.134327,...,0.087099,0.026237,-0.036236,0.057496,-0.021941,0.005434,https://upload.wikimedia.org/wikipedia/commons...,"en:Manchester Town Hall (en:Manchester, en:En...",0.196253,0.078518
1355834,0.147421,0.137543,0.220968,0.192709,0.167448,0.231680,0.181678,0.163100,0.070264,0.176476,...,0.149290,0.013793,0.051057,-0.111905,0.054401,0.076998,https://upload.wikimedia.org/wikipedia/commons...,Deborah Goldberg conducting fieldwork English:...,0.215896,0.049033
1355835,0.125749,0.205701,0.218751,0.052688,0.198685,0.148288,0.185024,0.143636,0.006773,0.173097,...,0.130153,0.059396,0.105359,0.296614,0.048320,0.058189,https://upload.wikimedia.org/wikipedia/commons...,A Nice tramway car at Place Massena Français&#...,0.194443,0.115030
1355836,0.064163,0.140261,0.167117,0.088087,0.101651,0.059989,0.258787,0.109619,0.139930,0.124345,...,0.103477,0.035289,0.014564,0.147341,0.006568,-0.036180,https://upload.wikimedia.org/wikipedia/commons...,"English: University Avenue, University Of Gla...",0.180178,0.049142


In [28]:
df_p.describe()

Unnamed: 0,196_caption_score,412_caption_score,172_caption_score,363_caption_score,198_caption_score,114_caption_score,419_caption_score,294_caption_score,388_caption_score,314_caption_score,...,192_context_score,20_context_score,296_context_score,144_context_score,12_context_score,224_context_score,250_context_score,376_context_score,concept_caption_score,concept_context_score
count,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,...,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0,1355838.0
mean,0.08559561,0.08686949,0.1113165,0.05740445,0.09280093,0.1027839,0.1135433,0.09396847,0.06029807,0.1049492,...,0.01923718,0.02708928,0.05571533,0.02264773,0.03161171,0.04298287,0.03614905,0.02557106,0.1764577,0.06276443
std,0.07012287,0.05866437,0.06498859,0.05323604,0.07046865,0.06140767,0.07860132,0.05407793,0.06309682,0.07308457,...,0.06044843,0.06729007,0.08930775,0.05915104,0.07877738,0.09391404,0.06996516,0.07838058,0.03635343,0.06590349
min,-0.1492556,-0.1255416,-0.1148668,-0.1664211,-0.1644311,-0.1255432,-0.1382757,-0.1218244,-0.1932573,-0.1752686,...,-0.2700528,-0.2722588,-0.2784553,-0.2538928,-0.265859,-0.3016184,-0.2675191,-0.2956381,0.134,-0.2344599
25%,0.03577911,0.04545975,0.06611484,0.02048744,0.04381821,0.06141226,0.05866365,0.05715708,0.01744766,0.05435307,...,-0.02188648,-0.01804999,-0.005748722,-0.01798432,-0.0225525,-0.02260899,-0.01217066,-0.02787367,0.148891,0.01922829
50%,0.07626972,0.08135188,0.1036905,0.05396095,0.08852602,0.09911543,0.1033881,0.08842104,0.0598905,0.09831265,...,0.01854405,0.02545261,0.04851583,0.02138509,0.02751547,0.03533991,0.03457581,0.02304182,0.1672995,0.06164784
75%,0.1254801,0.122635,0.1480672,0.09003481,0.1365627,0.1395021,0.1575852,0.1243169,0.1022901,0.1486356,...,0.05941956,0.06989236,0.1075833,0.06125178,0.08111304,0.1001635,0.08310743,0.07678358,0.194579,0.1050258
max,0.7550066,0.6103925,0.6149387,0.5102714,0.61045,0.7142168,0.6140311,0.6116345,0.5387621,0.6769031,...,0.5363311,0.5145766,0.5608774,0.4586499,0.6041094,0.589149,0.4173216,0.5398021,0.7286295,0.6892114


In [29]:
df_dict = dict()
for col in caption_score_list:
    num, _ = col.split('_', 1)
    c_list = [col] + [f'{num}_context_score'] + im_c + cap_s + con_s
    df = df_p[c_list]
    df = df[df[col] > df[col].quantile(0.9999)]
#     df = df[df[col] > 0.55]
    df = df.sort_values(by=[col], ascending=False)
    df_dict[col] = df

In [31]:
from IPython.display import display

In [32]:
for el in df_dict:
    t,_ = el.split('_', 1)
#     print(t)
    df_dict[el].columns = [f'{t}_caption_sim', f'{t}_context_sim', 'image_url',
                          'caption', f'ped_caption_sim', f'ped_context_sim']
    display(df_dict[el].describe().round(2))
    print('\n')

Unnamed: 0,196_caption_sim,196_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.56,0.3,0.44,0.23
std,0.05,0.24,0.11,0.19
min,0.51,-0.14,0.24,-0.06
25%,0.51,0.11,0.35,0.08
50%,0.54,0.25,0.43,0.16
75%,0.57,0.46,0.52,0.36
max,0.76,0.75,0.73,0.69






Unnamed: 0,412_caption_sim,412_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.46,0.19,0.52,0.19
std,0.04,0.16,0.08,0.17
min,0.43,-0.09,0.2,-0.07
25%,0.44,0.06,0.48,0.07
50%,0.45,0.17,0.53,0.13
75%,0.47,0.3,0.56,0.26
max,0.61,0.65,0.73,0.69






Unnamed: 0,172_caption_sim,172_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.5,0.18,0.5,0.17
std,0.03,0.16,0.09,0.17
min,0.46,-0.07,0.25,-0.08
25%,0.47,0.05,0.44,0.06
50%,0.49,0.14,0.52,0.11
75%,0.51,0.3,0.55,0.25
max,0.61,0.56,0.73,0.69






Unnamed: 0,363_caption_sim,363_context_sim,ped_caption_sim,ped_context_sim
count,135.0,135.0,135.0,135.0
mean,0.37,0.18,0.37,0.17
std,0.02,0.12,0.14,0.13
min,0.35,-0.06,0.14,-0.06
25%,0.35,0.09,0.28,0.09
50%,0.37,0.18,0.32,0.15
75%,0.38,0.28,0.48,0.21
max,0.51,0.5,0.73,0.68






Unnamed: 0,198_caption_sim,198_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.49,0.21,0.36,0.14
std,0.03,0.15,0.12,0.1
min,0.45,-0.14,0.15,-0.04
25%,0.46,0.11,0.27,0.08
50%,0.48,0.18,0.33,0.13
75%,0.5,0.31,0.4,0.21
max,0.61,0.61,0.73,0.5






Unnamed: 0,114_caption_sim,114_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.62,0.47,0.29,0.16
std,0.03,0.09,0.04,0.07
min,0.59,0.06,0.17,-0.08
25%,0.6,0.42,0.27,0.11
50%,0.61,0.48,0.29,0.17
75%,0.62,0.52,0.33,0.2
max,0.71,0.63,0.38,0.27






Unnamed: 0,419_caption_sim,419_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.52,0.27,0.33,0.15
std,0.02,0.15,0.13,0.12
min,0.5,-0.06,0.14,-0.07
25%,0.5,0.16,0.24,0.08
50%,0.51,0.27,0.28,0.13
75%,0.53,0.39,0.4,0.21
max,0.61,0.55,0.73,0.69






Unnamed: 0,294_caption_sim,294_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.46,0.21,0.45,0.19
std,0.04,0.16,0.15,0.15
min,0.42,-0.09,0.14,-0.06
25%,0.43,0.08,0.31,0.09
50%,0.44,0.2,0.5,0.15
75%,0.47,0.36,0.55,0.24
max,0.61,0.61,0.73,0.69






Unnamed: 0,388_caption_sim,388_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.39,0.17,0.33,0.11
std,0.04,0.14,0.15,0.11
min,0.34,-0.14,0.14,-0.14
25%,0.35,0.06,0.22,0.05
50%,0.37,0.15,0.29,0.11
75%,0.4,0.26,0.42,0.16
max,0.54,0.5,0.73,0.5






Unnamed: 0,314_caption_sim,314_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.53,0.23,0.33,0.15
std,0.03,0.17,0.13,0.13
min,0.49,-0.03,0.14,-0.08
25%,0.5,0.11,0.23,0.07
50%,0.52,0.2,0.29,0.13
75%,0.54,0.32,0.36,0.19
max,0.68,0.67,0.73,0.69






Unnamed: 0,98_caption_sim,98_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.58,0.29,0.37,0.17
std,0.03,0.17,0.11,0.12
min,0.55,-0.05,0.21,-0.06
25%,0.56,0.16,0.3,0.1
50%,0.57,0.24,0.34,0.13
75%,0.6,0.45,0.41,0.21
max,0.73,0.63,0.73,0.68






Unnamed: 0,80_caption_sim,80_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.42,0.24,0.31,0.17
std,0.03,0.15,0.14,0.11
min,0.38,-0.1,0.13,-0.05
25%,0.39,0.13,0.21,0.08
50%,0.41,0.26,0.26,0.16
75%,0.43,0.35,0.34,0.23
max,0.51,0.59,0.73,0.5






Unnamed: 0,134_caption_sim,134_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.57,0.38,0.26,0.11
std,0.02,0.13,0.1,0.07
min,0.54,0.0,0.13,-0.02
25%,0.55,0.31,0.2,0.08
50%,0.56,0.4,0.23,0.1
75%,0.58,0.48,0.29,0.15
max,0.66,0.62,0.67,0.46






Unnamed: 0,432_caption_sim,432_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.45,0.2,0.22,0.08
std,0.02,0.14,0.12,0.08
min,0.42,-0.07,0.13,-0.07
25%,0.43,0.09,0.16,0.02
50%,0.44,0.21,0.18,0.07
75%,0.46,0.31,0.23,0.14
max,0.53,0.61,0.73,0.45






Unnamed: 0,394_caption_sim,394_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.52,0.22,0.44,0.13
std,0.03,0.14,0.13,0.11
min,0.5,-0.07,0.17,-0.07
25%,0.5,0.12,0.37,0.06
50%,0.51,0.21,0.46,0.11
75%,0.53,0.31,0.53,0.17
max,0.68,0.59,0.73,0.57






Unnamed: 0,53_caption_sim,53_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.45,0.22,0.38,0.13
std,0.03,0.15,0.16,0.13
min,0.41,-0.1,0.13,-0.09
25%,0.42,0.09,0.24,0.05
50%,0.43,0.19,0.36,0.1
75%,0.47,0.35,0.53,0.18
max,0.57,0.54,0.73,0.52






Unnamed: 0,56_caption_sim,56_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.56,0.32,0.29,0.14
std,0.02,0.13,0.09,0.06
min,0.53,-0.06,0.14,-0.05
25%,0.54,0.23,0.23,0.1
50%,0.55,0.34,0.27,0.14
75%,0.56,0.41,0.32,0.17
max,0.67,0.58,0.7,0.3






Unnamed: 0,150_caption_sim,150_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.51,0.27,0.35,0.17
std,0.03,0.14,0.14,0.13
min,0.48,-0.13,0.14,-0.06
25%,0.49,0.18,0.23,0.09
50%,0.5,0.26,0.32,0.15
75%,0.53,0.38,0.46,0.22
max,0.64,0.53,0.73,0.69






Unnamed: 0,278_caption_sim,278_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.57,0.19,0.38,0.1
std,0.03,0.14,0.15,0.11
min,0.54,-0.08,0.16,-0.09
25%,0.55,0.09,0.25,0.04
50%,0.56,0.17,0.34,0.09
75%,0.59,0.24,0.53,0.13
max,0.71,0.59,0.73,0.57






Unnamed: 0,-1_caption_sim,-1_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.46,0.14,0.31,0.07
std,0.02,0.09,0.15,0.08
min,0.45,-0.08,0.14,-0.07
25%,0.45,0.08,0.19,0.03
50%,0.46,0.13,0.25,0.07
75%,0.47,0.2,0.4,0.11
max,0.57,0.39,0.73,0.5






Unnamed: 0,43_caption_sim,43_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.51,0.34,0.28,0.18
std,0.03,0.14,0.08,0.09
min,0.48,-0.11,0.13,-0.03
25%,0.49,0.27,0.25,0.12
50%,0.51,0.35,0.27,0.19
75%,0.53,0.46,0.31,0.26
max,0.64,0.58,0.73,0.33






Unnamed: 0,265_caption_sim,265_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.43,0.22,0.32,0.12
std,0.04,0.16,0.16,0.12
min,0.39,-0.05,0.14,-0.08
25%,0.4,0.1,0.19,0.05
50%,0.41,0.2,0.25,0.1
75%,0.43,0.33,0.47,0.16
max,0.56,0.57,0.73,0.68






Unnamed: 0,345_caption_sim,345_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.47,0.24,0.39,0.17
std,0.04,0.17,0.15,0.13
min,0.43,-0.12,0.14,-0.06
25%,0.44,0.13,0.27,0.08
50%,0.46,0.22,0.36,0.13
75%,0.49,0.35,0.53,0.24
max,0.67,0.66,0.73,0.69






Unnamed: 0,378_caption_sim,378_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.53,0.33,0.29,0.13
std,0.02,0.12,0.11,0.09
min,0.51,0.01,0.14,-0.07
25%,0.52,0.27,0.26,0.08
50%,0.52,0.34,0.26,0.11
75%,0.53,0.41,0.26,0.15
max,0.63,0.54,0.67,0.45






Unnamed: 0,11_caption_sim,11_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.54,0.32,0.24,0.12
std,0.02,0.13,0.11,0.08
min,0.52,0.01,0.14,-0.02
25%,0.52,0.22,0.18,0.07
50%,0.53,0.34,0.21,0.1
75%,0.54,0.42,0.26,0.15
max,0.61,0.54,0.73,0.45






Unnamed: 0,331_caption_sim,331_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.5,0.23,0.35,0.1
std,0.02,0.13,0.15,0.08
min,0.47,-0.05,0.14,-0.07
25%,0.48,0.14,0.21,0.06
50%,0.49,0.23,0.32,0.1
75%,0.51,0.34,0.48,0.14
max,0.61,0.55,0.73,0.34






Unnamed: 0,202_caption_sim,202_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.45,0.22,0.38,0.17
std,0.03,0.15,0.15,0.13
min,0.42,-0.08,0.14,-0.05
25%,0.42,0.11,0.25,0.09
50%,0.44,0.21,0.37,0.15
75%,0.46,0.31,0.52,0.22
max,0.62,0.57,0.73,0.69






Unnamed: 0,169_caption_sim,169_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.48,0.28,0.35,0.14
std,0.03,0.19,0.15,0.12
min,0.45,-0.02,0.14,-0.07
25%,0.45,0.11,0.23,0.07
50%,0.47,0.26,0.3,0.14
75%,0.49,0.45,0.51,0.19
max,0.62,0.62,0.73,0.68






Unnamed: 0,327_caption_sim,327_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.54,0.27,0.24,0.1
std,0.02,0.15,0.04,0.07
min,0.52,-0.04,0.13,-0.07
25%,0.53,0.15,0.22,0.05
50%,0.54,0.28,0.24,0.11
75%,0.55,0.39,0.26,0.16
max,0.63,0.58,0.39,0.28






Unnamed: 0,309_caption_sim,309_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.52,0.36,0.24,0.1
std,0.02,0.12,0.12,0.06
min,0.49,-0.06,0.14,-0.04
25%,0.5,0.28,0.17,0.06
50%,0.51,0.38,0.21,0.1
75%,0.53,0.44,0.26,0.15
max,0.6,0.61,0.73,0.33






Unnamed: 0,68_caption_sim,68_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.47,0.26,0.25,0.11
std,0.02,0.13,0.1,0.08
min,0.44,-0.08,0.14,-0.07
25%,0.45,0.18,0.19,0.06
50%,0.46,0.28,0.23,0.11
75%,0.48,0.35,0.28,0.15
max,0.57,0.49,0.7,0.48






Unnamed: 0,216_caption_sim,216_context_sim,ped_caption_sim,ped_context_sim
count,135.0,135.0,135.0,135.0
mean,0.53,0.32,0.23,0.11
std,0.03,0.15,0.09,0.08
min,0.49,-0.04,0.13,-0.1
25%,0.51,0.2,0.17,0.05
50%,0.52,0.35,0.2,0.11
75%,0.54,0.45,0.26,0.15
max,0.63,0.63,0.67,0.41






Unnamed: 0,40_caption_sim,40_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.54,0.36,0.29,0.11
std,0.02,0.11,0.1,0.07
min,0.52,-0.03,0.16,-0.03
25%,0.53,0.29,0.24,0.06
50%,0.54,0.38,0.27,0.1
75%,0.55,0.43,0.31,0.15
max,0.61,0.53,0.73,0.41






Unnamed: 0,99_caption_sim,99_context_sim,ped_caption_sim,ped_context_sim
count,135.0,135.0,135.0,135.0
mean,0.42,0.17,0.36,0.12
std,0.04,0.14,0.16,0.12
min,0.39,-0.1,0.14,-0.14
25%,0.4,0.07,0.21,0.06
50%,0.41,0.15,0.33,0.09
75%,0.43,0.28,0.51,0.16
max,0.57,0.49,0.73,0.5






Unnamed: 0,426_caption_sim,426_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.37,0.17,0.32,0.11
std,0.02,0.11,0.15,0.1
min,0.35,-0.04,0.14,-0.08
25%,0.35,0.08,0.2,0.05
50%,0.36,0.17,0.25,0.09
75%,0.38,0.25,0.43,0.15
max,0.48,0.52,0.73,0.54






Unnamed: 0,344_caption_sim,344_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.48,0.23,0.33,0.09
std,0.03,0.12,0.16,0.11
min,0.45,-0.04,0.14,-0.11
25%,0.46,0.15,0.19,0.02
50%,0.47,0.25,0.27,0.07
75%,0.48,0.3,0.46,0.14
max,0.58,0.52,0.73,0.56






Unnamed: 0,47_caption_sim,47_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.6,0.54,0.22,0.17
std,0.03,0.12,0.06,0.05
min,0.57,0.13,0.13,-0.01
25%,0.58,0.46,0.19,0.14
50%,0.59,0.58,0.22,0.17
75%,0.62,0.63,0.25,0.2
max,0.74,0.74,0.44,0.3






Unnamed: 0,8_caption_sim,8_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.47,0.27,0.31,0.16
std,0.02,0.14,0.08,0.08
min,0.44,-0.09,0.15,-0.02
25%,0.45,0.16,0.27,0.1
50%,0.45,0.3,0.27,0.16
75%,0.48,0.38,0.34,0.2
max,0.58,0.59,0.7,0.42






Unnamed: 0,416_caption_sim,416_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.36,0.18,0.34,0.14
std,0.05,0.15,0.16,0.14
min,0.33,-0.06,0.13,-0.07
25%,0.33,0.06,0.2,0.05
50%,0.35,0.17,0.27,0.11
75%,0.36,0.29,0.49,0.19
max,0.66,0.55,0.73,0.68






Unnamed: 0,112_caption_sim,112_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.51,0.25,0.37,0.13
std,0.03,0.13,0.14,0.09
min,0.48,-0.08,0.14,-0.04
25%,0.49,0.17,0.25,0.09
50%,0.5,0.27,0.32,0.12
75%,0.53,0.34,0.48,0.16
max,0.62,0.55,0.73,0.69






Unnamed: 0,137_caption_sim,137_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.52,0.23,0.4,0.14
std,0.02,0.13,0.14,0.1
min,0.5,-0.09,0.19,-0.03
25%,0.51,0.13,0.27,0.08
50%,0.52,0.24,0.39,0.12
75%,0.54,0.33,0.52,0.16
max,0.61,0.53,0.73,0.69






Unnamed: 0,441_caption_sim,441_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.43,0.19,0.21,0.06
std,0.04,0.16,0.12,0.09
min,0.39,-0.07,0.13,-0.11
25%,0.4,0.06,0.15,0.02
50%,0.41,0.17,0.17,0.06
75%,0.45,0.3,0.2,0.1
max,0.57,0.56,0.73,0.69






Unnamed: 0,192_caption_sim,192_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.37,0.22,0.23,0.12
std,0.05,0.13,0.09,0.1
min,0.31,-0.05,0.14,-0.09
25%,0.32,0.13,0.18,0.05
50%,0.35,0.22,0.22,0.1
75%,0.43,0.33,0.25,0.19
max,0.48,0.54,0.62,0.54






Unnamed: 0,20_caption_sim,20_context_sim,ped_caption_sim,ped_context_sim
count,135.0,135.0,135.0,135.0
mean,0.45,0.24,0.25,0.1
std,0.03,0.15,0.11,0.09
min,0.42,-0.06,0.14,-0.06
25%,0.43,0.11,0.18,0.05
50%,0.44,0.26,0.22,0.08
75%,0.47,0.36,0.26,0.14
max,0.62,0.48,0.7,0.48






Unnamed: 0,296_caption_sim,296_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.55,0.34,0.2,0.08
std,0.03,0.14,0.07,0.06
min,0.52,-0.03,0.14,-0.09
25%,0.53,0.26,0.16,0.03
50%,0.54,0.38,0.18,0.08
75%,0.56,0.45,0.22,0.12
max,0.67,0.55,0.7,0.32






Unnamed: 0,144_caption_sim,144_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.37,0.07,0.32,0.09
std,0.02,0.09,0.14,0.1
min,0.35,-0.12,0.15,-0.23
25%,0.35,0.02,0.22,0.04
50%,0.36,0.06,0.28,0.07
75%,0.38,0.12,0.41,0.13
max,0.44,0.46,0.73,0.54






Unnamed: 0,12_caption_sim,12_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.47,0.23,0.3,0.08
std,0.04,0.13,0.14,0.09
min,0.43,-0.07,0.14,-0.06
25%,0.44,0.12,0.2,0.02
50%,0.46,0.23,0.26,0.07
75%,0.48,0.33,0.35,0.12
max,0.63,0.49,0.73,0.46






Unnamed: 0,224_caption_sim,224_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.5,0.32,0.27,0.12
std,0.03,0.12,0.11,0.07
min,0.48,0.02,0.14,-0.02
25%,0.49,0.26,0.19,0.07
50%,0.49,0.34,0.22,0.12
75%,0.51,0.4,0.29,0.16
max,0.64,0.59,0.67,0.37






Unnamed: 0,250_caption_sim,250_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.34,0.16,0.23,0.07
std,0.02,0.09,0.12,0.09
min,0.31,-0.08,0.13,-0.08
25%,0.32,0.1,0.16,0.02
50%,0.33,0.16,0.19,0.06
75%,0.34,0.21,0.24,0.11
max,0.45,0.37,0.65,0.5






Unnamed: 0,376_caption_sim,376_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.47,0.3,0.19,0.1
std,0.03,0.15,0.04,0.07
min,0.44,-0.12,0.14,-0.15
25%,0.45,0.19,0.16,0.05
50%,0.46,0.37,0.18,0.1
75%,0.48,0.41,0.21,0.16
max,0.57,0.54,0.3,0.27






In [33]:
df_dict[el]

Unnamed: 0,376_caption_sim,376_context_sim,image_url,caption,ped_caption_sim,ped_context_sim
870052,0.567895,0.407775,https://upload.wikimedia.org/wikipedia/commons...,English: Recreation of Minnie Mouse's signatu...,0.189813,0.156888
788926,0.567709,0.407775,https://upload.wikimedia.org/wikipedia/commons...,English: Recreation of Mickey Mouse's signatu...,0.180582,0.156888
841923,0.543113,0.423695,https://upload.wikimedia.org/wikipedia/commons...,Cars 623 and 717 passing on the Red Car Troll...,0.200099,0.132485
667765,0.537630,0.350208,https://upload.wikimedia.org/wikipedia/commons...,Disney villains at Disneyland's Mickey's Hallo...,0.199137,0.043375
1268785,0.533986,0.539802,https://upload.wikimedia.org/wikipedia/commons...,Theatrical release poster English: Poster for ...,0.172679,0.117095
...,...,...,...,...,...,...
597662,0.441235,0.146075,https://upload.wikimedia.org/wikipedia/commons...,English: Iconic Toy Train Station,0.241279,0.132612
1305427,0.440948,0.407775,https://upload.wikimedia.org/wikipedia/commons...,English: Recreation of Tweedledee's signature...,0.151732,0.156888
1104752,0.440487,0.061867,https://upload.wikimedia.org/wikipedia/commons...,"English: A Disney bus in Walt Disney World, F...",0.176181,0.056294
937790,0.439831,-0.123846,https://upload.wikimedia.org/wikipedia/en/9/93...,"The official school mascot, Willy. He is the o...",0.211186,-0.042735


In [34]:
df_dict['412_caption_score'].describe().round(2)

Unnamed: 0,412_caption_sim,412_context_sim,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.46,0.19,0.52,0.19
std,0.04,0.16,0.08,0.17
min,0.43,-0.09,0.2,-0.07
25%,0.44,0.06,0.48,0.07
50%,0.45,0.17,0.53,0.13
75%,0.47,0.3,0.56,0.26
max,0.61,0.65,0.73,0.69


In [35]:
df_dict['412_caption_score']

Unnamed: 0,412_caption_sim,412_context_sim,image_url,caption,ped_caption_sim,ped_context_sim
874893,0.610392,0.199226,https://upload.wikimedia.org/wikipedia/commons...,Pedestrian trail English: Pedestrian trail,0.704843,0.105616
479525,0.586082,0.008303,https://upload.wikimedia.org/wikipedia/commons...,Pedestrian walk in Ferizaj English: Ferizaj City,0.595274,0.053994
1244500,0.573594,0.481619,https://upload.wikimedia.org/wikipedia/commons...,Prohibition of pedestrians (includes any kind ...,0.548525,0.483317
496374,0.565820,0.190722,https://upload.wikimedia.org/wikipedia/commons...,English: Pedestrian crossing,0.728629,0.157267
812924,0.565820,0.160237,https://upload.wikimedia.org/wikipedia/commons...,English: Pedestrian crossing,0.728629,0.152377
...,...,...,...,...,...,...
335906,0.426847,0.367209,https://upload.wikimedia.org/wikipedia/commons...,Construction of the new Covered Pedestrian Br...,0.552095,0.452756
1084636,0.426847,0.367209,https://upload.wikimedia.org/wikipedia/commons...,Construction of the new Covered Pedestrian Br...,0.552095,0.452756
276333,0.426847,0.367209,https://upload.wikimedia.org/wikipedia/commons...,Construction of the new Covered Pedestrian Br...,0.552095,0.452756
814205,0.426273,-0.059679,https://upload.wikimedia.org/wikipedia/commons...,Pedestrian route through the external rotunda,0.578300,-0.016740


In [43]:
pg_dict = dict()
pg_dict['a'] = [68, 112, 56, 224]
pg_dict['b'] = [309, 344, 47, -1, 53]
pg_dict['c'] = [314, 331, 419, 394]
pg_dict['d'] = [98, 196, 202, 294]
pg_dict['e'] = [172, 137, 11, 134, 150, 216]
pg_dict['f'] = [363, 40, 43]
pg_dict['g'] = [441, 376]
pg_dict['h'] = [144, 416, 327]
pg_dict['i'] = [192, 20, 114]
pg_dict['j'] = [345]
pg_dict['k'] = [8, 265]
pg_dict['l'] = [250, 80, 426]
pg_dict['m'] = [169, 99, 378]
pg_dict['n'] = [432, 388, 412]
pg_dict['o'] = [12, 278, 198, 296] 

In [44]:
# k_name = ['image_url', 'caption', 'concept_caption_score']
for el in pg_dict:
    pg_dict[el] = [f'{i}_caption_score' for i in pg_dict[el]]
    pg_dict[el] = [df_dict[i] for i in pg_dict[el]]
    pg_dict[el] = [df_.reset_index(drop=True) for df_ in pg_dict[el]]
    

In [45]:
pg_dict[el][0]

Unnamed: 0,12_caption_sim,12_context_sim,image_url,caption,ped_caption_sim,ped_context_sim
0,0.628946,0.295938,https://upload.wikimedia.org/wikipedia/commons...,Pedestrian trail English: Pedestrian trail,0.704843,0.105616
1,0.591550,0.071280,https://upload.wikimedia.org/wikipedia/commons...,Walking Trail,0.422199,0.072473
2,0.591550,0.071280,https://upload.wikimedia.org/wikipedia/commons...,Walking Trail,0.422199,0.072473
3,0.591550,0.071280,https://upload.wikimedia.org/wikipedia/commons...,Walking Trail,0.422199,0.072473
4,0.589132,0.471493,https://upload.wikimedia.org/wikipedia/commons...,Hikers awaiting southbound train English: Look...,0.300308,0.143705
...,...,...,...,...,...,...
131,0.431405,0.316774,https://upload.wikimedia.org/wikipedia/commons...,Fall Creek Boardwalk English: Board walk on pe...,0.262093,0.107849
132,0.431401,0.350863,https://upload.wikimedia.org/wikipedia/commons...,Hikers at summit Hikers on the summit of Algon...,0.150562,0.062848
133,0.431197,0.102290,https://upload.wikimedia.org/wikipedia/commons...,English: Hikers from the Santiago expedition ...,0.207058,0.090179
134,0.431039,0.220727,https://upload.wikimedia.org/wikipedia/commons...,Pedestrian grade crossing of the former Dedham...,0.436264,0.114665


In [48]:
k_name = ['image_url', 'caption', 'ped_caption_sim', 'ped_context_sim']

for el in pg_dict:
    t_df = pd.DataFrame(columns = [f'{el}_topic_score'] + [f'{el}_context_score'] + k_name)
    for df_ in pg_dict[el]:
        df_.columns = [f'{el}_topic_score'] + [f'{el}_context_score'] + k_name
        t_df = pd.concat([t_df, df_], ignore_index=True)
    pg_dict[el] = t_df

In [49]:
for el in pg_dict:
#     print(el)
    display(pg_dict[el].describe().round(2))
    print('\n')

Unnamed: 0,a_topic_score,a_context_score,ped_caption_sim,ped_context_sim
count,544.0,544.0,544.0,544.0
mean,0.51,0.29,0.29,0.12
std,0.04,0.13,0.12,0.07
min,0.44,-0.08,0.14,-0.07
25%,0.48,0.21,0.21,0.08
50%,0.5,0.31,0.26,0.12
75%,0.54,0.38,0.32,0.16
max,0.67,0.59,0.73,0.69






Unnamed: 0,b_topic_score,b_context_score,ped_caption_sim,ped_context_sim
count,680.0,680.0,680.0,680.0
mean,0.5,0.3,0.3,0.11
std,0.06,0.19,0.14,0.1
min,0.41,-0.1,0.13,-0.11
25%,0.46,0.14,0.19,0.05
50%,0.48,0.29,0.24,0.1
75%,0.54,0.43,0.37,0.17
max,0.74,0.74,0.73,0.56






Unnamed: 0,c_topic_score,c_context_score,ped_caption_sim,ped_context_sim
count,544.0,544.0,544.0,544.0
mean,0.52,0.24,0.36,0.13
std,0.03,0.15,0.14,0.11
min,0.47,-0.07,0.14,-0.08
25%,0.5,0.13,0.24,0.07
50%,0.51,0.23,0.32,0.11
75%,0.53,0.35,0.48,0.18
max,0.68,0.67,0.73,0.69






Unnamed: 0,d_topic_score,d_context_score,ped_caption_sim,ped_context_sim
count,544.0,544.0,544.0,544.0
mean,0.51,0.26,0.41,0.19
std,0.07,0.19,0.13,0.15
min,0.42,-0.14,0.14,-0.06
25%,0.44,0.11,0.29,0.09
50%,0.51,0.22,0.41,0.15
75%,0.56,0.38,0.53,0.24
max,0.76,0.75,0.73,0.69






Unnamed: 0,e_topic_score,e_context_score,ped_caption_sim,ped_context_sim
count,815.0,815.0,815.0,815.0
mean,0.53,0.28,0.33,0.14
std,0.04,0.15,0.15,0.11
min,0.46,-0.13,0.13,-0.1
25%,0.5,0.16,0.21,0.07
50%,0.52,0.29,0.27,0.11
75%,0.55,0.41,0.46,0.17
max,0.66,0.63,0.73,0.69






Unnamed: 0,f_topic_score,f_context_score,ped_caption_sim,ped_context_sim
count,407.0,407.0,407.0,407.0
mean,0.48,0.29,0.31,0.16
std,0.08,0.15,0.11,0.1
min,0.35,-0.11,0.13,-0.06
25%,0.38,0.18,0.25,0.08
50%,0.51,0.31,0.28,0.14
75%,0.53,0.41,0.33,0.21
max,0.64,0.58,0.73,0.68






Unnamed: 0,g_topic_score,g_context_score,ped_caption_sim,ped_context_sim
count,272.0,272.0,272.0,272.0
mean,0.45,0.24,0.2,0.08
std,0.04,0.16,0.09,0.08
min,0.39,-0.12,0.13,-0.15
25%,0.41,0.11,0.16,0.03
50%,0.45,0.26,0.18,0.08
75%,0.48,0.41,0.21,0.13
max,0.57,0.56,0.73,0.69






Unnamed: 0,h_topic_score,h_context_score,ped_caption_sim,ped_context_sim
count,408.0,408.0,408.0,408.0
mean,0.42,0.18,0.3,0.11
std,0.09,0.16,0.13,0.11
min,0.33,-0.12,0.13,-0.23
25%,0.35,0.05,0.21,0.04
50%,0.37,0.14,0.26,0.1
75%,0.53,0.29,0.34,0.16
max,0.66,0.58,0.73,0.68






Unnamed: 0,i_topic_score,i_context_score,ped_caption_sim,ped_context_sim
count,407.0,407.0,407.0,407.0
mean,0.48,0.31,0.26,0.13
std,0.11,0.16,0.09,0.09
min,0.31,-0.06,0.14,-0.09
25%,0.42,0.18,0.2,0.06
50%,0.45,0.33,0.25,0.12
75%,0.6,0.45,0.3,0.19
max,0.71,0.63,0.7,0.54






Unnamed: 0,j_topic_score,j_context_score,ped_caption_sim,ped_context_sim
count,136.0,136.0,136.0,136.0
mean,0.47,0.24,0.39,0.17
std,0.04,0.17,0.15,0.13
min,0.43,-0.12,0.14,-0.06
25%,0.44,0.13,0.27,0.08
50%,0.46,0.22,0.36,0.13
75%,0.49,0.35,0.53,0.24
max,0.67,0.66,0.73,0.69






Unnamed: 0,k_topic_score,k_context_score,ped_caption_sim,ped_context_sim
count,272.0,272.0,272.0,272.0
mean,0.45,0.25,0.31,0.14
std,0.04,0.16,0.13,0.1
min,0.39,-0.09,0.14,-0.08
25%,0.41,0.12,0.23,0.08
50%,0.45,0.26,0.27,0.12
75%,0.46,0.36,0.36,0.19
max,0.58,0.59,0.73,0.68






Unnamed: 0,l_topic_score,l_context_score,ped_caption_sim,ped_context_sim
count,408.0,408.0,408.0,408.0
mean,0.37,0.19,0.29,0.12
std,0.04,0.13,0.14,0.11
min,0.31,-0.1,0.13,-0.08
25%,0.34,0.1,0.19,0.04
50%,0.36,0.18,0.23,0.09
75%,0.4,0.27,0.33,0.17
max,0.51,0.59,0.73,0.54






Unnamed: 0,m_topic_score,m_context_score,ped_caption_sim,ped_context_sim
count,407.0,407.0,407.0,407.0
mean,0.48,0.26,0.34,0.13
std,0.05,0.17,0.15,0.11
min,0.39,-0.1,0.14,-0.14
25%,0.43,0.12,0.23,0.07
50%,0.47,0.27,0.26,0.11
75%,0.52,0.39,0.46,0.17
max,0.63,0.62,0.73,0.68






Unnamed: 0,n_topic_score,n_context_score,ped_caption_sim,ped_context_sim
count,408.0,408.0,408.0,408.0
mean,0.43,0.19,0.36,0.13
std,0.05,0.15,0.17,0.13
min,0.34,-0.14,0.13,-0.14
25%,0.4,0.07,0.2,0.05
50%,0.44,0.17,0.33,0.1
75%,0.46,0.29,0.52,0.17
max,0.61,0.65,0.73,0.69






Unnamed: 0,o_topic_score,o_context_score,ped_caption_sim,ped_context_sim
count,544.0,544.0,544.0,544.0
mean,0.52,0.24,0.31,0.1
std,0.05,0.15,0.14,0.1
min,0.43,-0.14,0.14,-0.09
25%,0.47,0.12,0.2,0.04
50%,0.52,0.23,0.27,0.09
75%,0.56,0.37,0.39,0.14
max,0.71,0.61,0.73,0.57






In [33]:
pd.set_option('display.max_rows', None, 'display.max_columns', None)
df_ = df_dict[el][df_dict[el]['376_score'] >= 0.5]
df_['caption']

4160119    A Peanuts character at the Playhouse Theatre i...
2391380     English: Recreation of Minnie Mouse's signatu...
2065916     English: Recreation of Mickey Mouse's signatu...
4228353    Some popular Disney characters (from left to r...
2278672     Cars 623 and 717 passing on the Red Car Troll...
1581985    Disney villains at Disneyland's Mickey's Hallo...
4525356    Theatrical release poster English: Poster for ...
1207889    Disney with Mickey Mouse English: Walt Disney ...
3253565    The Walt Disney World Railroad The Roy O.Disne...
2202810     English: Recreation of Goofy's signature from...
4204084     English: Recreation of Donald Duck's signatur...
3067569     English: Mickey and Minnie's Runaway Railway ...
699304           The Golden Mickeys at Hong Kong Disneyland 
4173529    Peanuts creator Charles M. Schulz advised Jim ...
3656634    Mickey's Toontown in Disneyland w:Toontown w:D...
1197681    The loading area in Disneyland, California. En...
4448351     English: Rec

In [34]:
pd.reset_option('max_columns')
pd.reset_option('display.max_rows')

In [18]:
csim_des = csim_df.describe().round(2)

In [19]:
pd.set_option('display.max_columns', None)
csim_des


Unnamed: 0,196_score,412_score,172_score,363_score,198_score,114_score,419_score,294_score,388_score,314_score,98_score,80_score,134_score,432_score,394_score,53_score,56_score,150_score,278_score,-1_score,43_score,265_score,345_score,378_score,11_score,331_score,202_score,169_score,327_score,309_score,68_score,216_score,40_score,99_score,426_score,344_score,47_score,8_score,416_score,112_score,137_score,441_score,192_score,20_score,296_score,144_score,12_score,224_score,250_score,376_score
count,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0,5411978.0
mean,0.03,0.03,0.05,0.01,0.04,0.05,0.05,0.04,0.02,0.05,0.06,0.01,0.07,0.05,0.07,0.03,0.05,0.04,0.09,0.08,0.05,0.03,0.02,0.06,0.05,0.07,0.01,0.04,0.09,0.04,0.04,0.06,0.07,0.03,0.02,0.08,0.08,0.0,0.05,0.03,0.07,0.04,0.02,0.04,0.07,0.05,0.02,0.06,0.03,0.06
std,0.07,0.06,0.07,0.06,0.07,0.07,0.08,0.06,0.07,0.07,0.07,0.07,0.08,0.07,0.08,0.07,0.08,0.07,0.08,0.08,0.07,0.06,0.07,0.09,0.08,0.09,0.06,0.07,0.08,0.08,0.07,0.08,0.07,0.07,0.06,0.08,0.09,0.07,0.06,0.07,0.07,0.08,0.06,0.06,0.09,0.07,0.07,0.08,0.07,0.08
min,-0.25,-0.24,-0.25,-0.25,-0.28,-0.27,-0.24,-0.23,-0.28,-0.25,-0.24,-0.28,-0.28,-0.26,-0.32,-0.25,-0.28,-0.25,-0.27,-0.26,-0.26,-0.25,-0.3,-0.28,-0.26,-0.27,-0.25,-0.27,-0.26,-0.27,-0.28,-0.27,-0.26,-0.26,-0.25,-0.28,-0.27,-0.29,-0.27,-0.26,-0.28,-0.29,-0.27,-0.33,-0.26,-0.25,-0.3,-0.28,-0.24,-0.28
25%,-0.01,-0.02,0.0,-0.03,-0.02,-0.0,0.0,-0.0,-0.03,0.0,0.01,-0.04,0.02,-0.0,0.01,-0.02,-0.0,-0.0,0.03,0.02,0.0,-0.01,-0.02,0.0,-0.01,0.01,-0.03,-0.01,0.04,-0.02,-0.01,0.0,0.02,-0.01,-0.02,0.03,0.02,-0.04,0.01,-0.02,0.02,-0.01,-0.02,-0.01,0.01,0.0,-0.03,-0.0,-0.01,0.01
50%,0.03,0.03,0.04,0.01,0.03,0.04,0.05,0.03,0.02,0.05,0.05,0.01,0.07,0.05,0.06,0.02,0.05,0.04,0.08,0.08,0.05,0.03,0.02,0.05,0.04,0.07,0.01,0.04,0.09,0.03,0.03,0.05,0.06,0.03,0.02,0.08,0.07,0.0,0.05,0.02,0.06,0.04,0.02,0.04,0.07,0.05,0.02,0.05,0.03,0.06
75%,0.07,0.07,0.09,0.05,0.08,0.09,0.1,0.07,0.06,0.1,0.1,0.05,0.12,0.09,0.12,0.07,0.1,0.08,0.14,0.13,0.09,0.07,0.07,0.11,0.09,0.13,0.05,0.09,0.14,0.08,0.08,0.11,0.11,0.07,0.06,0.13,0.13,0.05,0.09,0.07,0.11,0.09,0.06,0.08,0.12,0.09,0.07,0.11,0.08,0.11
max,0.76,0.61,0.61,0.51,0.61,0.71,0.61,0.61,0.55,0.68,0.73,0.51,0.66,0.53,0.68,0.57,0.67,0.64,0.71,0.57,0.64,0.56,0.67,0.63,0.61,0.61,0.62,0.62,0.63,0.6,0.57,0.63,0.61,0.57,0.48,0.58,0.74,0.58,0.66,0.62,0.61,0.57,0.53,0.62,0.67,0.44,0.63,0.64,0.45,0.58


In [20]:
pd.reset_option('max_columns')

In [17]:
col_list = [el for el in df.columns if 'score' in el]
print(col_list)

['196_score', '412_score', '172_score', '363_score', '198_score', '114_score', '419_score', '294_score', '388_score', '314_score', '98_score', '80_score', '134_score', '432_score', '394_score', '53_score', '56_score', '150_score', '278_score', '-1_score', '43_score', '265_score', '345_score', '378_score', '11_score', '331_score', '202_score', '169_score', '327_score', '309_score', '68_score', '216_score', '40_score', '99_score', '426_score', '344_score', '47_score', '8_score', '416_score', '112_score', '137_score', '441_score', '192_score', '20_score', '296_score', '144_score', '12_score', '224_score', '250_score', '376_score']


In [18]:
df.columns

Index(['index', 'language', 'page_url', 'image_url', 'page_title',
       'section_title', 'hierarchical_section_title',
       'caption_reference_description', 'caption_attribution_description',
       'caption_alt_text_description', 'mime_type', 'original_height',
       'original_width', 'is_main_image', 'attribution_passes_lang_id',
       'page_changed_recently', 'context_page_description',
       'context_section_description', 'caption', 'context', '196_score',
       '412_score', '172_score', '363_score', '198_score', '114_score',
       '419_score', '294_score', '388_score', '314_score', '98_score',
       '80_score', '134_score', '432_score', '394_score', '53_score',
       '56_score', '150_score', '278_score', '-1_score', '43_score',
       '265_score', '345_score', '378_score', '11_score', '331_score',
       '202_score', '169_score', '327_score', '309_score', '68_score',
       '216_score', '40_score', '99_score', '426_score', '344_score',
       '47_score', '8_score', '416

In [19]:
df_1 = df[['196_score', 'image_url', 'caption']]
df_1 = df_1.sort_values(by=['196_score'], ascending=False)
df_1 = df_1.reset_index(drop=True)
df_1

Unnamed: 0,196_score,image_url,caption
0,0.627881,https://upload.wikimedia.org/wikipedia/commons...,"New pedestrian crossing, Barnards Green"
1,0.614582,https://upload.wikimedia.org/wikipedia/commons...,Crosswalk in San Francisco English: Pedestrian...
2,0.566653,https://upload.wikimedia.org/wikipedia/commons...,"A typical ""L-shape"" of tactile paving at a con..."
3,0.561883,https://upload.wikimedia.org/wikipedia/commons...,Pedestrian crossing view of Puente de la Unida...
4,0.560429,https://upload.wikimedia.org/wikipedia/commons...,"""A Pedestrian Was Killed Crossing Here"" sign o..."
...,...,...,...
542588,-0.219343,https://upload.wikimedia.org/wikipedia/commons...,Emperor Hanazono after taking the tonsure. Eng...
542589,-0.221817,https://upload.wikimedia.org/wikipedia/commons...,Arabia has been gradually drying out since the...
542590,-0.223173,https://upload.wikimedia.org/wikipedia/commons...,English: The steamer H.D. Coffinberry before ...
542591,-0.225270,https://upload.wikimedia.org/wikipedia/en/a/a8...,Bucket detail on a small turbine.


In [23]:
import urllib
import base64
import copy
from os.path import exists
from time import sleep
img_root = '/raid/AISSEL/Hamed/datasets/wit/images/'

In [42]:
def get_image(link):
    URL = copy.copy(link)
    link = link.encode("utf-8")
    b64 = base64.b64encode(link)
    s_64 = str(b64)
    s_64 = s_64[2:-1]
    im_path = img_root + s_64 + '.jpg'
    if not exists(im_path):
        try:
            with urllib.request.urlopen(URL) as url:
                with open(im_path, 'wb') as f:
                    f.write(url.read())
#             print(f'we download {str(base64.b64decode(s_64))[2:-1]}')
        except Exception as e:
            print(e)
            print(f'I could not download {str(base64.b64decode(s_64))[2:-1]}')
            sleep(10)
    else:
#         print(f'{im_path}\nalready downloaded please check it')
        pass
        
#     print(f'we decode it back from {str(base64.b64decode(s_64))}')
#     return str(base64.b64decode(s_64))[2:-1]

In [27]:
ll = 'https://upload.wikimedia.org/wikipedia/commons/8/8c/Peter_Weinstein_New.jpg'
get_image(ll)


/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy84LzhjL1BldGVyX1dlaW5zdGVpbl9OZXcuanBn.jpg
already downloaded please check it


In [29]:
df_p["image_url"].to_csv('example.tsv', sep="\t")

0          https://upload.wikimedia.org/wikipedia/commons...
1          https://upload.wikimedia.org/wikipedia/commons...
2          https://upload.wikimedia.org/wikipedia/commons...
3          https://upload.wikimedia.org/wikipedia/commons...
4          https://upload.wikimedia.org/wikipedia/commons...
                                 ...                        
1355833    https://upload.wikimedia.org/wikipedia/commons...
1355834    https://upload.wikimedia.org/wikipedia/commons...
1355835    https://upload.wikimedia.org/wikipedia/commons...
1355836    https://upload.wikimedia.org/wikipedia/commons...
1355837    https://upload.wikimedia.org/wikipedia/commons...
Name: image_url, Length: 1355838, dtype: object

In [30]:
df_p["image_url"].to_csv(f'{root_path}/selected_url.tsv', sep="\t")

In [33]:
df_1 = df_p.iloc[:650000,:]
df_2 = df_p.iloc[650000:,:]

In [36]:
df_2["image_url"].to_csv(f'{root_path}/selected_url.tsv', sep="\t")

In [None]:
for index, row in tqdm(df_1.iterrows(), total=df_1.shape[0]):
    get_image(row["image_url"])

  0%|          | 0/650000 [00:00<?, ?it/s]

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8wLzBjLyVFMSU4MyU5OSVFMSU4MyU5MCVFMSU4MyU5NSVFMSU4MyU5OSVFMSU4MyU5MCVFMSU4MyVBMSVFMSU4MyU5OCVFMSU4MyVBMyVFMSU4MyVBMCVFMSU4MyU5OF8lRTElODMlQTElRTElODMlOTAlRTElODMlOUElRTElODMlOTAlRTElODMlOUIlRTElODMlOTAlRTElODMlOUMlRTElODMlOTMlRTElODMlOTYlRTElODMlQTAlRTElODMlOTAuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/0/0c/%E1%83%99%E1%83%90%E1%83%95%E1%83%99%E1%83%90%E1%83%A1%E1%83%98%E1%83%A3%E1%83%A0%E1%83%98_%E1%83%A1%E1%83%90%E1%83%9A%E1%83%90%E1%83%9B%E1%83%90%E1%83%9C%E1%83%93%E1%83%96%E1%83%A0%E1%83%90.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/29/Kentucky_License_Plate_%22In_God_We_Trust%22.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/d/dc/Nutcracker_Coin.png
HTTP Error 404: Not Found
I could not download https://uploa

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/9/91/Coat_of_arms_of_Beverly_Hills%2C_California.png
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy85LzkyL0FiLWVfR2FybV9UdW1iJTJDX0FiLWVfZ2FybV9iYWxhJTJDX0xhcmlqYW4lMkNfRGFtYXZhbmQlMkNfTWF6YW5kYXJhbl9wcm92aW5jZV8lRDglQTclRDklODUlRDglQTclRDklODUlRDglQjIlRDglQTclRDglQUYlRDklODdfJUQ4JUEyJUQ4JUE4JURBJUFGJUQ4JUIxJUQ5JTg1XyVEOCVBOCVEOCVBNyVEOSU4NCVEOCVBNyVEOCU4Q18lRDklODQlRDglQTclRDglQjElREIlOEMlRDglQUMlRDglQTclRDklODYlRDglOENfJUQ5JTg1JUQ4JUE3JUQ4JUIyJUQ5JTg2JUQ4JUFGJUQ4JUIxJUQ4JUE3JUQ5JTg2Xy1fcGFub3JhbWlvLmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/9/92/Ab-e_Garm_Tumb%2C_Ab-e_garm_bala%2C_Larijan%2C_Damavand%2C_Mazandaran_province_%D8%A7%D9%85%D8%A7%D9%85%D8%B2%D8%A7%D8%AF%D9%87_%D8%A2%D8%A8%DA%AF%D8%B1%D9%85_%D8%A8%D8%A7%D9%84%D8%A7%D8%8C_%D9%84%D8%A7%D8%B1%DB%8C%D8%AC%D8%A7%

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy82LzYwLyVENyU5MSVENyU5OSVENyVBQV8lRDclOTQlRDclOUIlRDclQTAlRDclQTElRDclQUFfJUQ3JTk0JUQ3JTk3JUQ3JTk1JUQ3JUE4JUQ3JTkxJUQ3JTk0XyVENyU5MSVENyVBMiVENyU5OSVENyVBOF8lRDclOTQlRDclQTIlRDclQUElRDclOTklRDclQTclRDclOTQuSlBH.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/6/60/%D7%91%D7%99%D7%AA_%D7%94%D7%9B%D7%A0%D7%A1%D7%AA_%D7%94%D7%97%D7%95%D7%A8%D7%91%D7%94_%D7%91%D7%A2%D7%99%D7%A8_%D7%94%D7%A2%D7%AA%D7%99%D7%A7%D7%94.JPG
<urlopen error [Errno -3] Temporary failure in name resolution>
I could not download https://upload.wikimedia.org/wikipedia/commons/2/2b/Camus_bahts.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9mL2Y3LyVFMCVCOCU5RSVFMCVCOCVBMyVFMCVCOCVCMCVFMCVCOCVBRCVFMCVCOCVCOCVFMCVCOSU4MiVFMCVCOCU5QSVFMCVCOCVBQSVFMCVCOCU5Nl8lRTAlQjglQTclRTAlQjglQ

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9iL2I5LyVFMCVBNiU5QSVFMCVBNiU5NSVFMCVBNyU4RCVFMCVBNyVCMCVFMCVBNyU4NyVFMCVBNiVCNiVFMCVBNyU4RCVFMCVBNiVBQyVFMCVBNyVCMF8lRTAlQTYlQTglRTAlQTYlQkUlRTAlQTYlQUUlRTAlQTclODclRTAlQTclQjAlRTAlQTclODdfJUUwJUE2JTk2JUUwJUE3JThEJUUwJUE2JUFGJUUwJUE2JUJFJUUwJUE2JUE0XyVFMCVBNiU4OSVFMCVBNiVBRSVFMCVBNiVCRSVFMCVBNiU5RiVFMCVBNyU4MSVFMCVBNiVBRSVFMCVBNiVBOCVFMCVBNyU4MCVFMCVBNyVCMF8lRTAlQTYlQUUlRTAlQTYlQkUlRTAlQTYlOUMlRTAlQTYlQTRfJUUwJUE2JTg1JUUwJUE3JUIxJUUwJUE2JUI4JUUwJUE3JThEJUUwJUE2JUE1JUUwJUE2JUJGJUUwJUE2JUE0XyVFMCVBNiU5QSVFMCVBNiU5NSVFMCVBNyU4RCVFMCVBNyVCMCVFMCVBNyU4NyVFMCVBNiVCNiVFMCVBNyU4RCVFMCVBNiVBQyVFMCVBNyVCMF8lRTAlQTYlQjYlRTAlQTYlQkYlRTAlQTYlQjIlRTAlQTYlQkUlRTAlQTUlQTQuSlBH.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b9/%E0%A6%9A%E0%A6%95%E0%A7%8D%E0%A7%B0%E0%A7%87%E0%A6%B6%E0%A7%8D%E0%A6%AC%E0%A7%B0_%E0%A6%A8%E0%A6%BE%E0%A6%AE%E0%A7%87

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy81LzUxLyVEMCU5RiVEMCVCMCVEMSU4MCVEMCVCMCVEMCVCNF8lRDAlOUYlRDAlQkUlRDAlQjElRDAlQjUlRDAlQjQlRDElOEJfJUQwJUJEJUQwJUIwXyVEMCU5QSVEMSU4MCVEMCVCMCVEMSU4MSVEMCVCRCVEMCVCRSVEMCVCOV8lRDAlQkYlRDAlQkIlRDAlQkUlRDElODklRDAlQjAlRDAlQjQlRDAlQjhfMjRfJUQwJUI4JUQxJThFJUQwJUJEJUQxJThGXzE5NDVfJUQwJUIzLl8lMjgxNyUyOS5qcGc=.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/5/51/%D0%9F%D0%B0%D1%80%D0%B0%D0%B4_%D0%9F%D0%BE%D0%B1%D0%B5%D0%B4%D1%8B_%D0%BD%D0%B0_%D0%9A%D1%80%D0%B0%D1%81%D0%BD%D0%BE%D0%B9_%D0%BF%D0%BB%D0%BE%D1%89%D0%B0%D0%B4%D0%B8_24_%D0%B8%D1%8E%D0%BD%D1%8F_1945_%D0%B3._%2817%29.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy82LzZlLyVEMCU5RiVEMSU4MCVEMCVCRSVEMSU4MiVEMCVCNSVEMSU4MSVEMSU4Ml8lRDElODElRDElODAlRDAlQjUlRDElODklRDElODNfJUQwJTk4JUQwJUIyJUQwJUIwJU

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/26/Antonis_Michaloglou_2016.png
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy80LzQyL1BvcnRyYWl0X29mX01hZGFtZV9KZWFuX1Ryb25jaGluXyUyOG4lQzMlQTllX0FubmVfTW9sJUMzJUE4bmVzJTI5JTJDX0plYW4tJUMzJTg5dGllbm5lX0xpb3RhcmQlMkNfMTc1OCUyQ19wYXN0ZWxfb25fdmVsbHVtJTJDXzYzLjZfYnlfNTAuNl9jbSUyQ19Mb3V2cmUlMkNfUGFyaXMuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/4/42/Portrait_of_Madame_Jean_Tronchin_%28n%C3%A9e_Anne_Mol%C3%A8nes%29%2C_Jean-%C3%89tienne_Liotard%2C_1758%2C_pastel_on_vellum%2C_63.6_by_50.6_cm%2C_Louvre%2C_Paris.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9hL2FiL0NhcnRlX3Bvc3RhbGVfbW9udHJhbnRfbGFfbW9zcXUlQzMlQTllX2RlX1NpZGlfTWFuc291cl8lRDglQUMlRDglQTclRDklODUlRDglQjlfJUQ4JUIzJUQ5JThBJUQ4JU

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8xLzFhLyVEMCU5RiVEMCVCMCVEMSU4MCVEMCVCQV8lRDAlOTQlRDAlQjclRDAlQjUlRDElODAlRDAlQjYlRDAlQjglRDAlQkQlRDElODElRDAlQkElRDAlQkUlRDAlQjMlRDAlQkUlMkNfJUQwJTlEJUQwJUJFJUQwJUIyJUQwJUJFJUQxJTgxJUQwJUI4JUQwJUIxJUQwJUI4JUQxJTgwJUQxJTgxJUQwJUJBXzAyLmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/1/1a/%D0%9F%D0%B0%D1%80%D0%BA_%D0%94%D0%B7%D0%B5%D1%80%D0%B6%D0%B8%D0%BD%D1%81%D0%BA%D0%BE%D0%B3%D0%BE%2C_%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA_02.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy81LzUzLyVEMCU5RiVEMCVCMCVEMCVCQyVEMSU4RiVEMSU4MiVEMCVCRCVEMCVCOCVEMCVCQV8lRDAlQjQlRDAlQjUlRDElODAlRDAlQjUlRDAlQjIlRDElOEYlRDAlQkQlRDAlQkQlRDAlQkUlRDAlQjMlRDAlQkVfJUQwJUI3JUQwJUJFJUQwJUI0JUQxJTg3JUQwJUI1JUQxJTgxJUQxJTgyJUQwJUIyJUQwJUIwXyVE

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy82LzZiLyVEOCVBNyVEOSU4NCVEOSU4NSVEOCVBQyVEOCVBNyVEOCVCMiVEOCVCMV8lRDglQTclRDklODQlRDklODIlRDglQUYlRDklOEElRDklODUlRDglQTlfJUQ4JUE3JUQ5JTg0JUQ4JUFGJUQ4JUE3JUQ4JUIxXyVEOCVBNyVEOSU4NCVEOCVBOCVEOSU4QSVEOCVCNiVEOCVBNyVEOCVBMV8xMV81NF8xOV85MzMwMDAuanBlZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/6/6b/%D8%A7%D9%84%D9%85%D8%AC%D8%A7%D8%B2%D8%B1_%D8%A7%D9%84%D9%82%D8%AF%D9%8A%D9%85%D8%A9_%D8%A7%D9%84%D8%AF%D8%A7%D8%B1_%D8%A7%D9%84%D8%A8%D9%8A%D8%B6%D8%A7%D8%A1_11_54_19_933000.jpeg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy83LzdlL0RlYWZfb3JfSG9IX3BlcnNvbl9hdF9oaXNfd29ya3BsYWNlX3VzaW5nX2FfVmlkZW9fUmVsYXlfU2VydmljZV90b19jb21tdW5pY2F0ZV93aXRoX2FfaGVhcmluZ19wZXJzb25fdmlhX2FfVmlkZW9fSW50ZXJwcmV0ZXJfYW5kX3NpZ25fbGFuZ3VhZ2VfU1ZDQ18yMDA3X0JyaWdpdHRlX1

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/7/75/2017_Hermann_Sch%C3%BCtzenh%C3%B6fer_%2836231845573%29.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e5/Camila_2.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/2e/Christel-DeHaan.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8wLzBkLyVEMCU5RiVEMCVCMCVEMCVCQyVEMSU4RiVEMSU4MiVEMCVCRCVEMCVCOCVEMCVCQV8lRDAlOUElRDAlQkUlRDAlQjElRDAlQjclRDAlQkUlRDAlQkQlRDElODNfJUQwJTk4Ll8lRDAlOTQuXyVEMSU4MyVEMCVCQi5fJUQwJUE3JUQwJUI1JUQwJUJCJUQxJThFJUQxJTgxJUQwJUJBJUQwJUI4JUQwJUJEJUQxJTg2JUQwJUI1JUQwJUIyXzE4OS4uanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/0/0d/%D0%9F%D0%B0%D0%BC%D1%8F%D1%82%D0%BD%D0%B8%D0%BA_%D0%9A%D0%BE%D0%B1%D0%B7%D0%BE%D0%BD%D1%83_%D0%98._%D0%94._%D1%83%D0%BB._%D0%A7%

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9hL2E3L1NQXzMyMDNfU0RQNDVfd2l0aF9UcmFpbl81MSUyQ190aGVfU2FuX0pvYXF1aW5fRGF5bGlnaHRfYXJyaXZpbmdfYXRfT2FrbGFuZCUyQ19DQV9vbl9NYXlfMjIlMkNfMTk3MF8tLV8xX29mXzJfUGhvdG9zXyUyODMyODU4MDc3NzUxJTI5LmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/a/a7/SP_3203_SDP45_with_Train_51%2C_the_San_Joaquin_Daylight_arriving_at_Oakland%2C_CA_on_May_22%2C_1970_--_1_of_2_Photos_%2832858077751%29.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/a/ab/2020_Basketball_Hall_of_Fame_half_dollar_line_art_reverse.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/6/69/Dil_Jan_Khan.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/f/ff/Chris_Mansell.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/w

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/d/dd/Parker_Frontier_Ball-point_Pen.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/8e/Stamps_of_Ecuador%2C_2006-102.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/a/a9/O.J.-Simpson-Crime-Scene-Photos-17a.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/23/Petr_Taticek_ERC.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9iL2I3LyVFMCVCOCVBMiVFMCVCOCVCMSVFMCVCOCU4MSVFMCVCOCVBOSVFMCVCOSU4QyVFMCVCOCU5NyVFMCVCOCVBNyVFMCVCOCVCMiVFMCVCOCVBMyVFMCVCOCU5QSVFMCVCOCVCMiVFMCVCOCVBNV9EZW1vbl9HdWFyZGlhbl8lMjgxMSUyOS5qcGc=.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b7/%E0%B8%A2%E0%B8%B1%E0%B8%81%E0%B8%A9%E0%B9%8C%E0%B8%97%E0%B8%A7%E0%B8%B2%E0%B

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/8c/Bells_at_Temple_Square.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/9/98/Road_transport.JPG
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/e/ee/British_Army_Pathfinder_Platoon_Logo.svg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/3/34/Flag_of_Commander-in-Chief.svg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/f/fc/Kerber_Leonid.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9mL2ZiL0lyYXFpX1QtNzJfdGFua3NfYW5kX2FuX00xMTNfQVBDX2Zyb21fdGhlX0lyYXFpX0FybXlfOXRoX01lY2hhbml6ZWRfRGl2aXNpb25fcGFzc190aHJvdWdoX2FfaGlnaHdheV9jaGVja3BvaW50X2luX011c2hhaGFkYSUyQ19JcmFxLmpwZw==.jpg'
I could not download https://upload.wikimedia.org/

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9kL2Q0LzIwMTIxMTE1X0IyX3BsYXRmb3JtX2luX0d1dGluZ19TdGF0aW9uJTJDX1RhaXBlaV9NZXRyb18lRTUlOEYlQjAlRTUlOEMlOTclRTYlOEQlQjclRTklODElOEIlRTUlOEYlQTQlRTQlQkElQUQlRTclQUIlOTlCMiVFNiU5QyU4OCVFNSU4RiVCMC5qcGc=.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/d/d4/20121115_B2_platform_in_Guting_Station%2C_Taipei_Metro_%E5%8F%B0%E5%8C%97%E6%8D%B7%E9%81%8B%E5%8F%A4%E4%BA%AD%E7%AB%99B2%E6%9C%88%E5%8F%B0.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/80/TP_Pakuwon_Tower_April_2018.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/83/Albert_Divo_at_the_1929_Targa_Florio.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/28/516sfs-emblem.jpg
HTTP Error 404: Not Found
I could not download https://uplo

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/5/54/Bill_Collier_St_George.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/4/49/Prodi27marzo2007.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/84/Mohammad-reza-shah.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/f/f9/Ns_stanton_station.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/3/31/Sophie_Gr%C3%A9goire-Trudeau.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/3/36/Beloy.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzJhLzEzMTIwNl8lRUMlQTclOTElRUMlOUMlQkMlRUIlQTElOUNfJUVBJUIwJTgwJUVCJThBJTk0XyVFQSVCOCVCOF92aXAlRUMlOEIlOUMlRUMlODIlQUMlRUQlOUElOENfJUVBJUI5JTgw

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/f/f0/Honda_Crosstour_facelift_01_China_2015-04-13.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzIyLyVEMCVBMSVEMCVCQSVEMCVCRSVEMCVCQiVEMSU5NiVEMCVCMiVEMSU4MSVEMSU4QyVEMCVCQSVEMSU5Nl8lRDAlOTElRDAlQjUlRDElODElRDAlQkElRDAlQjglRDAlQjQlRDAlQjguXyVEMCU5QyVEMSU5NiVEMSU4MSVEMSU4NiVEMCVCNV8lRDAlQjIlRDElOTYlRDAlQjQlRDAlQkYlRDAlQkUlRDElODclRDAlQjglRDAlQkQlRDAlQkElRDElODNfJUQwJUJGJUQwJUJFJUQxJTgwJUQxJTgzJUQxJTg3XyVEMCVCMCVEMCVCMiVEMSU4MiVEMCVCRSVEMSU4OCVEMCVCQiVEMSU4RiVEMSU4NSVEMSU4M18lRDAlOUNfMDZfJTI4JUQwJTlBJUQwJUI4JUQxJTk3JUQwJUIyLSVEMCVBNyVEMCVCRSVEMCVCRiUyOS4uSlBH.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/2/22/%D0%A1%D0%BA%D0%BE%D0%BB%D1%96%D0%B2%D1%81%D1%8C%D0%BA%D1%96_%D0%91%D0%B5%D1%81%D0%BA%D0%B8%D0%B4%D0%B8._%D0%9C%D1%96%D1%81%D1%86%D0%B5_%D0%B2%D1%96%D0%B4%D0%BF%D0%BE%D1%

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/d/d2/Argenis.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/7/73/Wmignolo.JPG
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/f/f6/Parco_Sempione_-Milano.bmp.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/e/e9/Wimbit-NYC-1731.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e1/FreedomParkwayFormerInterstate485.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/0/0f/Maria_Zacharchenko.png
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8xLzE5LyUyMiVEMCVBRSVEMCU5QSVEMCU5RSVEMCVBMSUyMiUyQ18lRDAlQkYlRDAlQkUlRDElODFfJUQwJTlEJUQwJUJFJUQwJUIyJUQwJUJFJUQxJTgxJUQwJUJGJUQwJUIwJUQxJTg

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy82LzZjL1JheW1vbmRfRHVjaGFtcC1WaWxsb24lMkNfMTkxMiUyQ19MYV9NYWlzb25fQ3ViaXN0ZV8lMjhDdWJpc3RfSG91c2UlMjlfYXRfdGhlX1NhbG9uX2QlMjdBdXRvbW5lJTJDXzE5MTIlMkNfZGV0YWlsX29mX3RoZV9lbnRyYW5jZS5fUGhvdG9ncmFwaF9ieV9EdWNoYW1wLVZpbGxvbi5qcGc=.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/6/6c/Raymond_Duchamp-Villon%2C_1912%2C_La_Maison_Cubiste_%28Cubist_House%29_at_the_Salon_d%27Automne%2C_1912%2C_detail_of_the_entrance._Photograph_by_Duchamp-Villon.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/86/21_FS.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/b/bb/MC-Galaxy.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9hL2EwLyVEMCU5RSVEMCVCMyVEMCVCQiVEMSU4RiVEM

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/b/be/Guido_Marzulli_-_Autoritratto_1969.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy85LzlmLyVEMCU5NCVEMCVCNSVEMCVCQiVEMCVCRSVEMCVCMiVEMCVCRSVEMCVCOV8lRDAlQjQlRDAlQkUlRDAlQkMlMkNfJUQwJTlEJUQwJUJFJUQwJUIyJUQwJUJFJUQxJTgxJUQwJUI4JUQwJUIxJUQwJUI4JUQxJTgwJUQxJTgxJUQwJUJBXzIuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/9/9f/%D0%94%D0%B5%D0%BB%D0%BE%D0%B2%D0%BE%D0%B9_%D0%B4%D0%BE%D0%BC%2C_%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA_2.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/85/Mannerheimin_aukio.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/7/79/Ktmb_class_91_and_class_92_ipoh_station.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datase

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/0/00/Manastyrsky_Antin_Autoportrait_1919.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8zLzM4LzE2X1RoZV9zaGlwX2NhbWVfbmVhcmVyX2FuZF9uZWFyZXItSWxsdXN0cmF0aW9uX2J5X1BhdWxfSGFyZHlfZm9yX1JvZ3Vlc19vZl90aGVfRmllcnlfQ3Jvc3NfYnlfU2FtdWVsX1dhbGtleS1Db3VydGVzeV9vZl9Ccml0aXNoX0xpYnJhcnkuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/3/38/16_The_ship_came_nearer_and_nearer-Illustration_by_Paul_Hardy_for_Rogues_of_the_Fiery_Cross_by_Samuel_Walkey-Courtesy_of_British_Library.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy83LzdmL0luc2NyaWJlZF9QZWRlc3RhbF9vZl9CdWRkaGFfSW1hZ2VfSW5zdGFsbGVkX2luX3RoZV9SZWlnbl9vZl9CdWRoYV9HdXB0YV8tX0NpcmNhXzQ4MF9DRV8tX0dvdmluZF9OYWdhcl8tX0FDQ05fODItMjQwXy1fR292ZXJubWVudF9

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/4/44/AndersononSTS-107.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b5/Josef_Nocar_%2810_December_1886-%29.png
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy83LzcyL1plYnJhX2FuZF9TbmFrZSUyQ19TdGFmZm9yZHNoaXJlJTJDX0VuZ2xhbmQlMkNfMTg1MC0xODcwJTJDX2dsYXplZF9lYXJ0aGVud2FyZV8tX1BvcnRsYW5kX011c2V1bV9vZl9BcnRfLV9Qb3J0bGFuZCUyQ19NYWluZV8tX0RTQzA0MzU3LmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/7/72/Zebra_and_Snake%2C_Staffordshire%2C_England%2C_1850-1870%2C_glazed_earthenware_-_Portland_Museum_of_Art_-_Portland%2C_Maine_-_DSC04357.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b5/Ceiling_of_Nature_Church.JPG
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/ima

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy82LzY0L0lzbGVfb2ZfTWFuc19maW5hbnNtaW5pc3RyYXJfQWxhbl9CZWxsX3Nrcml2ZXJfdW5kZXJfYXZ0YWxfdGlsbHNhbW1hbnNfbWVkX3NpbmFfbm9yZGlza2Ffa29sbGVnb3JfdmlkX05vcmRpc2thX1JhZGV0c19zZXNzaW9uX2lfT3Nsby5fMjAwNy0xMC0zMC5fRm90by1fTWFnbnVzX0Zyb2RlcmJlcmcuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/6/64/Isle_of_Mans_finansministrar_Alan_Bell_skriver_under_avtal_tillsammans_med_sina_nordiska_kollegor_vid_Nordiska_Radets_session_i_Oslo._2007-10-30._Foto-_Magnus_Froderberg.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/7/70/Ravindra_Vijay.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzJlL1BlbC1BaXJfQXZpYXRpb24lMkNfY29udHJhY3RlZF9mb3JfQW1idWxhbmNlX1ZpY3RvcmlhJTJDXyUyOFZILVZBRSUyOV9SYXl0aGVvbl9CZWVjaF9TdXB

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/7/78/Fes.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/f/fc/%E0%B8%A5%E0%B8%B2%E0%B8%A2%E0%B9%80%E0%B8%8B%E0%B9%87%E0%B8%99_%E0%B8%9B%E0%B8%A3%E0%B8%B0%E0%B8%88%E0%B8%B4%E0%B8%99_%E0%B8%88%E0%B8%B1%E0%B9%88%E0%B8%99%E0%B8%95%E0%B8%AD%E0%B8%87.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/6/6d/Alliance_av.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzI1LyVEMCVBMSVEMCVCQSVEMSU4MyVEMCVCQiVEMSU4QyVEMCVCRiVEMSU4MiVEMSU4MyVEMSU4MCVEMCVCMF8lRDAlOTUlRDAlQkElRDAlQjAlRDElODIlRDAlQjUlRDElODAlRDAlQjglRDAlQkQlRDElOEJfSUlfJUQwJUIyXyUyMiVEMCVBMCVEMSU4MyVEMSU4MSVEMSU4MSVEMCVCQSVEMCVCRSVEMCVCQ18lRDAlOUMlRDElODMlRDAlQjclRDAlQjUlRDAlQjUlMjIlMkNfJUQwJUIzLiVEMCVBMSVEMCVCMCVEMCVCRCVEMCVCQSVEMSU4Mi0lRDAlOUYlRDAlQjUlRDElO

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e6/Innenansicht_nach_der_Sanierung._Foto_Uwe_Dettmar_%282013%29.JPG
HTTP Error 404: Not Found
I could not download http://upload.wikimedia.org/wikipedia/commons/4/42/Wappen_Bettringen.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/5/5d/Ines_maricic_victoria.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8wLzA4L0FsYmVydF9DYW11cyUyQ19nYWduYW50X2RlX3ByaXhfTm9iZWwlMkNfcG9ydHJhaXRfZW5fYnVzdGUlMkNfcG9zJUMzJUE5X2F1X2J1cmVhdSUyQ19mYWlzYW50X2ZhY2VfJUMzJUEwX2dhdWNoZSUyQ19jaWdhcmV0dGVfZGVfdGFiYWdpc21lLmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/0/08/Albert_Camus%2C_gagnant_de_prix_Nobel%2C_portrait_en_buste%2C_pos%C3%A9_au_bureau%2C_faisant_face_%C3%A0_gauche%2C_cigarette_de_tabagisme.jpg
HTTP Error 404: Not Found
I could not d

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8xLzFmLyVEMCU5RiVEMSU5NiVEMCVCMiVEMCVCNCVEMCVCNSVEMCVCRCVEMCVCRCVEMCVCOCVEMCVCOV8lRDElODQlRDAlQjAlRDElODElRDAlQjAlRDAlQjRfJUQwJUJDJUQwJUIwJUQxJTk0JUQxJTgyJUQwJUJBJUQxJTgzXyVEMCVCMl8lRDAlOUMlRDAlQjAlRDAlQkIlRDElOTYlRDElOTclRDAlQjIlRDElODYlRDElOEYlRDElODUuSlBH.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/1/1f/%D0%9F%D1%96%D0%B2%D0%B4%D0%B5%D0%BD%D0%BD%D0%B8%D0%B9_%D1%84%D0%B0%D1%81%D0%B0%D0%B4_%D0%BC%D0%B0%D1%94%D1%82%D0%BA%D1%83_%D0%B2_%D0%9C%D0%B0%D0%BB%D1%96%D1%97%D0%B2%D1%86%D1%8F%D1%85.JPG
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/1/18/Nang_Khin_Zay_Yar.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/4/49/A_Roma.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/1/19/Harem-M%C3%

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/c/c2/Seoul_City_Hall.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy81LzU4L0FybW9yeV9TaG93JTJDXzE5MTMlMkNfdGhlX0N1YmlzdF9yb29tJTJDX1JheW1vbmRfRHVjaGFtcC1WaWxsb24lMkNfQWxiZXJ0X0dsZWl6ZXMlMkNfTWFyY2VsX0R1Y2hhbXAlMkNfQWxleGFuZGVyX0FyY2hpcGVua28lMkNfTmV3X1lvcmtfVHJpYnVuZSUyQ18xN19GZWJydWFyeV8xOTEzJTJDX3AuXzcuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/5/58/Armory_Show%2C_1913%2C_the_Cubist_room%2C_Raymond_Duchamp-Villon%2C_Albert_Gleizes%2C_Marcel_Duchamp%2C_Alexander_Archipenko%2C_New_York_Tribune%2C_17_February_1913%2C_p._7.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/22/Photo_by_Troy_Conrad.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/ea/First_National_SME_Conferen

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzIyL1BsYXphX2RlX01heW9fLV9TaWx2aW9fUm9kciVDMyVBRGd1ZXpfeV9WJUMzJUFEY3Rvcl9IZXJlZCVDMyVBRGFfZHVyYW50ZV9lbF9GZXN0aXZhbF9Ub2Rvc19Tb21vc19BcmdlbnRpbm9zLTI1LW1heS0wNi1wcmVzaWRlbmNpYS1nb3Zhci5qcGc=.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/2/22/Plaza_de_Mayo_-_Silvio_Rodr%C3%ADguez_y_V%C3%ADctor_Hered%C3%ADa_durante_el_Festival_Todos_Somos_Argentinos-25-may-06-presidencia-govar.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/28/Asteras2018.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/9/99/Ryan_T._Murphy.png
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9hL2E2L1N0YW5kaW5nX0dvZGRlc3NfU2FzaHRpX0JldHdlZW5fVHdvX1dhcnJpb3JzX1NrYW5kYV9hbmRfVmlzYW

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e6/Paul_De_Grauwe.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/0/05/Ren%C3%A9_Weiler.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/0/0e/Lady_Tremaine.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/3/3b/%D0%9E%D1%82%D0%B5%D0%BB%D1%8C_%D0%A3%D0%BA%D1%80%D0%B0%D0%B8%D0%BD%D0%B0.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzIwLyVENSU5MCVENSVBMSVENiU4NiVENiU4NiVENSVBQiVENSVBQl8lRDUlQjAlRDUlQjglRDYlODIlRDUlQjclRDUlQTElRDYlODAlRDUlQjElRDUlQTElRDUlQjYlRDUlQThfJUQ1JUIwJUQ1JUExJUQ1JUI0JUQ1JUExJUQ1JUI2JUQ1JUI4JUQ2JTgyJUQ1JUI2XyVENSVBNCVENSVCQSVENiU4MCVENSVCOCVENiU4MSVENSVBQl8lRDUlQTQlRDUlQUIlRDUlQjQlRDUlQTElRDYlODFfMDMuSlBH.jpg'
I could not download https:

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/2/21/A_breakdancer_performing_in_Cologne%2C_2017_%282_of_2%29.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/0/09/100_taka_1972_2.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9kL2RmL0JyaWplc2hfUGF0aGFrJTJDX01pbmlzdGVyX29mX0xhd18lMjZfSnVzdGljZV9hbmRfQWRkaXRpb25hbF9FbmVyZ3lfUmVzb3VyY2VzX2luX1V0dGFyX1ByYWRlc2glMkNfaW5hdWd1cmF0aW5nX3RoZV9iYWRhX21hbmdhbF9mZXN0aXZpdGllc19hdF9VUE5FREFfb2ZmaWNlX2luX1ZpYmh1dGlfS2hhbmRfJTI4TWF5XzIwMTclMjkuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/d/df/Brijesh_Pathak%2C_Minister_of_Law_%26_Justice_and_Additional_Energy_Resources_in_Uttar_Pradesh%2C_inaugurating_the_bada_mangal_festivities_at_UPNEDA_office_in_Vibhuti_Khand_%28May_2017%29.jpg
HTTP Error 404: Not Found
I could not download https://uploa

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/a/af/David_Duke_em_1978.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy85LzkzLyVEOCVBNiVEQiU4OCVEOCVCMSVEQiU4OCVEOSU4NSVEQSU4NiVEOSU4OV8lRDglQUUlREIlOTUlRDklODQlRDklODIlRDglQTYlRDglQTclRDglQjElRDglQTdfJUQ4JUE2JUQ4JUE3JUQ5JThBJUQ4JUIxJUQ5JTg4JUQ5JUJFJUQ5JTg4JUQ4JUIxJUQ4JUFBJUQ5JTg5Xy1fJUQwJTkwJUQxJThEJUQxJTgwJUQwJUJFJUQwJUJGJUQwJUJFJUQxJTgwJUQxJTgyXyVEMCVBMyVEMSU4MCVEMSU4MyVEMCVCQyVEMSU4NyVEMCVCOF8lRDAlQkElRDAlQkUlRDElODAlRDAlQjglRDAlQjQlRDAlQkUlRDElODAlRDAlQjBfLV8lQzMlOUNyJUMzJUJDbXFpX0FpcnBvcnRfQ29ycmlkb3JfLV9wYW5vcmFtaW8uanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/9/93/%D8%A6%DB%88%D8%B1%DB%88%D9%85%DA%86%D9%89_%D8%AE%DB%95%D9%84%D9%82%D8%A6%D8%A7%D8%B1%D8%A7_%D8%A6%D8%A7%D9%8A%D8%B1%D9%88%D9%BE%D9%88%D8%B1%D8%AA%D9%89_-_%D0%90%D1%8D%D1%80%D0%BE%D0%BF%D0%BE%D1%80%D1%82_

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/4c/Maureen_O%27Connor.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/d/df/51_East_Main_Street%2C_Yarmouth.jpeg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy84Lzg5LzAyX09uZV9jYXVnaHRfbWVfcm91Z2hseV9ieV90aGVfdGhyb2F0LUlsbHVzdHJhdGlvbl9ieV9QYXVsX0hhcmR5X2Zvcl9Sb2d1ZXNfb2ZfdGhlX0ZpZXJ5X0Nyb3NzX2J5X1NhbXVlbF9XYWxrZXktQ291cnRlc3lfb2ZfQnJpdGlzaF9MaWJyYXJ5LmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/8/89/02_One_caught_me_roughly_by_the_throat-Illustration_by_Paul_Hardy_for_Rogues_of_the_Fiery_Cross_by_Samuel_Walkey-Courtesy_of_British_Library.jpg
HTTP Error 404: Not Found
I could not download http://upload

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/3/30/Kelley_Stand_Road.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/8b/Elena_Murariu_Portrait.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/3/35/Unrooted_Armenian_gravestones_in_a_church_yard_in_Velistsikhe%2C_Georgia.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9iL2I1L0FydG90eXBlc19vZl90aGVfcmVtb3ZhbF90cmFuc3BvcnRfYW5kX2VyZWN0aW9uX29mX0NsZW9wYXRyYXNfTmVlZGxlX2Zyb21fRWd5cHRfdG9fdGhlX01ldHJvcG9saXRhbl9NdXNldW1faW5fTmV3X1lvcmtfaW5fRWd5cHRpYW5fb2JlbGlza3NfUGxhdGVfbm9fSS5qcGc=.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b5/Artotypes_of_the_removal_transport_and_erection_of_Cleopatras_Needle_from_Egypt_to_the_Metropolitan_Museum_in_New_York_in_Egyptian_obelisks_Plate_no_I.jpg
[

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy82LzY1L1RoZV9NYWtzeW1vdnljaF9TY2llbnRpZmljX0xpYnJhcnlfJTI4JUQwJTlEJUQwJUIwJUQxJTgzJUQwJUJBJUQwJUJFJUQwJUIyJUQwJUIwXyVEMCVCMSVEMSU5NiVEMCVCMSVEMCVCQiVEMSU5NiVEMCVCRSVEMSU4MiVEMCVCNSVEMCVCQSVEMCVCMF8lRDElOTYlRDAlQkMlRDAlQjUlRDAlQkQlRDElOTZfJUQwJTlDLl8lRDAlOUMlRDAlQjAlRDAlQkElRDElODElRDAlQjglRDAlQkMlRDAlQkUlRDAlQjIlRDAlQjglRDElODclRDAlQjAlMjkuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/6/65/The_Maksymovych_Scientific_Library_%28%D0%9D%D0%B0%D1%83%D0%BA%D0%BE%D0%B2%D0%B0_%D0%B1%D1%96%D0%B1%D0%BB%D1%96%D0%BE%D1%82%D0%B5%D0%BA%D0%B0_%D1%96%D0%BC%D0%B5%D0%BD%D1%96_%D0%9C._%D0%9C%D0%B0%D0%BA%D1%81%D0%B8%D0%BC%D0%BE%D0%B2%D0%B8%D1%87%D0%B0%29.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/d/d6/Chincoteague_National_Wildlife_Refuge_-_A_-_Stierch.JPG
HTTP Error 404: Not Found
I coul

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/9/9f/Moucharabier_-_panoramio.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/9/95/%D0%91%D1%83%D0%B4%D0%B8%D0%BD%D0%BE%D0%BA_%D0%92%D0%B5%D1%80%D1%85%D0%BE%D0%B2%D0%BD%D0%BE%D1%97_%D0%A0%D0%B0%D0%B4%D0%B8_%D0%A3%D0%A0%D0%A1%D0%A0_%D0%9A%D0%B8%D1%97%D0%B2_%D0%93%D1%80%D1%83%D1%88%D0%B5%D0%B2%D1%81%D1%8C%D0%BA%D0%BE%D0%B3%D0%BE_%D0%9C%D0%B8%D1%85%D0%B0%D0%B9%D0%BB%D0%B0_%D0%B2%D1%83%D0%BB.%2C_5.JPG
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/ec/Proposed_Cork_light_rail_map_from_CMATS.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzI0L1kuTS5DLkEuX2VtYmxlbV9mb3JtZWRfYnlfb2ZmaWNlc

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b6/DC_Mathew_with_Bestuurscollege_c._1956.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e7/R_veltze_presidente.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/8/86/Mohammad_Rasoulof-pic00001.1.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy85LzlmLzEwMzktJUQwJUI5XyVEMCVCNyVEMCVCNSVEMCVCRCVEMSU5NiVEMSU4MiVEMCVCRCVEMCVCOCVEMCVCOV8lRDElODAlRDAlQjAlRDAlQkElRDAlQjUlRDElODIlRDAlQkQlRDAlQjglRDAlQjlfJUQwJUJGJUQwJUJFJUQwJUJCJUQwJUJBLnBuZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/9/9f/1039-%D0%B9_%D0%B7%D0%B5%D0%BD%D1%96%D1%82%D0%BD%D0%B8%D0%B9_%D1%80%D0%B0%D0%BA%D0%B5%D1%82%D0%BD%D0%B8%D0%B9_%D0%BF%D0%BE%D0%BB%D0%BA.png
HTTP Error 404: Not Found
I could not download https://upl

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8zLzMxL0NoYXVyYXNpX0d1bWJhZF8lMjg4NF9kb21lcyUyOV9pc19hX3NxdWFyZV9uaW5lX2RvbWVkX3N0cnVjdHVyZV9pbl9hX3dhbGxlZF9jb3VydHlhcmRfd2l0aF90d29fZ3JhdmVzX3VuZGVyX2NlbnRyYWxfZG9tZS5fVGhlX3Byb2JhYmxlX2RhdGVfYXNzaWduZWRfdG9fdGhpc19ndW1iYWRfbGF0ZV8xNXRoX29yX2Vhcmx5XzE2dGhfY2VudHVyeS4uSlBH.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/3/31/Chaurasi_Gumbad_%2884_domes%29_is_a_square_nine_domed_structure_in_a_walled_courtyard_with_two_graves_under_central_dome._The_probable_date_assigned_to_this_gumbad_late_15th_or_early_16th_century..JPG
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/1/15/Structures_%26_Culture_Moveable_Museum.JPG
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy80LzRiLyVFMCVBRSVBNCVFMCVBRSU5RSVFMCVB

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/2/2f/Spoorlijn_winterswijk_zevenaar.png
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8xLzE2L0ltYWdlc19mcm9tX3RoZV9saWZlX29mX0NocmlzdF8tX1RoZV9iYXB0aXNtX29mX0NocmlzdF9ieV9TdF9Kb2huX3RoZV9CYXB0aXN0JTJDX2FuX2FuZ2VsX2hvbGRzX0NocmlzdCUyN3Nfcm9iZV8tX1BzYWx0ZXJfb2ZfRWxlYW5vcl9vZl9BcXVpdGFpbmVfJTI4Y2EuXzExODUlMjlfLV9LQl83Nl9GXzEzJTJDX2ZvbGl1bV8wMTlyLmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/1/16/Images_from_the_life_of_Christ_-_The_baptism_of_Christ_by_St_John_the_Baptist%2C_an_angel_holds_Christ%27s_robe_-_Psalter_of_Eleanor_of_Aquitaine_%28ca._1185%29_-_KB_76_F_13%2C_folium_019r.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/4/44/Flag_of_Mongmong-Toto-Maite%2C_Guam.png
HTTP Error 404: Not Found
I could not download https://u

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/7/70/The_Facade_Of_The_31_Building.JPG
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/3/34/Jugal_Kishore_Choudhury.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/4/45/Organik_and_drake.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/a/af/%E5%8C%97%E4%BA%AC%E5%9C%B0%E9%93%816%E5%8F%B7%E7%BA%BF%E6%9C%9D%E9%98%B3%E9%97%A8%E7%AB%99%E7%AB%99%E5%8E%85%E5%A3%81%E7%94%BB.JPG
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cDovL3VwbG9hZC53aWtpbWVkaWEub3JnL3dpa2lwZWRpYS9jb21tb25zLzMvM2YvJUQwJTkyXyVEMSU4NiVEMCVCNSVEMCVCQiVEMCVCRSVEMCVCQ18lRDAlQjMlRDAlQkUlRDElODAlRDAlQkUlRDAlQjRfJUQwJUJGJUQxJTgwJUQwJUI4JUQwJUI3JUQwJUI1JUQwJUJDJUQwJUI4JUQxJTgxJUQxJTgyXyVEMCVCOF8lRDElODMlRDElOEUlRDElODIlRDAlQjUlRDAlQkQuanBn.jpg'
I could not download ht

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/c/c8/Fairchild_Metroliner_%28BinAir%29_%284666533560%29_%282%29.jpg


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/b/bb/Pravik-volodimir.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e2/New_Taipei_City_2018.png
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/5/5e/Westfield_Valencia_indoor_fountain.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b4/William_Frank_1906.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/7/7e/Alice_Sheppard_and_Laurel_Lawson_perform_%22Excerpt_from_Snapshot_%28Minsky%27s_Burlesque%2C_New_Jersey%2C_ca._1954%29%22_-_2.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/d/d2/CHENNITHALA_2012DSC_0062.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/1/15/Alexandru_G._Golescu1.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wik

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/en/9/93/Nick_Nanton_Picture_with_Family.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/d/d4/B%C5%99ehy%2C_municipal_office.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b3/Official_portrait_of_Paul_Howell_MP_crop_2.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/1/1d/Showfield_Okehampton_RFC.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikiped

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/3/3d/Zastava_Florida_In.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/e/ef/JT_Wyman_2012-02-25_2.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/9/94/YB_Alan_Ling_Sie_Kiong.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/2/21/Lindenshade_circ

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/9/96/Tuggeranong_Hyperdome.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/8/8d/110_Livingston_J2_jeh.JPG
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/6/61/2017_American_Liberty_225th_Anniversary_Union_Obverse.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/bd/Porsche_917_-_H_Kelleners_1970-05-31.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent po

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/3/3c/Civil_Service_Development_Inst._stop_board_20060729.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/8/8f/Mehrafza_Mirzazad_Barijugh_in_Chapel_Hill%2C_NC%2C_2013.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/0/0b/ABT_e-Transporter%2C_GIMS_2019%2C_Le_Grand-Saconnex_%28GIMS9978%29.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/w

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/f/fa/Map_%D0%96%D0%B5%D0%BB%D0%B5%D0%B7%D0%BD%D0%BE%D0%B4%D0%BE%D1%80%D0%BE%D0%B6%D0%BD%D0%B0%D1%8F_%D0%BB%D0%B8%D0%BD%D0%B8%D1%8F_%D0%9A%D0%BE%D0%BC%D1%81%D0%BE%D0%BC%D0%BE%D0%BB%D1%8C%D1%81%D0%BA-%D0%BD%D0%B0-%D0%90%D0%BC%D1%83%D1%80%D0%B5_%E2%80%94_%D0%94%D0%B5%D0%B6%D0%BD%D1%91%D0%B2%D0%BA%D0%B0_2020-04-07.svg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/46/Will_Redmond_20151010.png
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikip

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/c/c3/Saegmuller.jpeg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/ba/Joseph_Roche_-_Germany.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/3/37/Hattem_voorkant.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/9/97/D%C5%99%C3%ADnov_KL_flag.jpg


HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/b/ba/Big_L_and_Rakim.png
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy83Lzc4L0F0YXQlQzMlQkNya19Cb3VsZXZhcmQlMkNfRmFjdWx0eV9vZl9MYW5ndWFnZSUyQ19IaXN0b3J5X2FuZF9HZW9ncmFwaHklMkNfJUM0JUIwc21ldF9QYXNoYV9HaXJscyUyN19JbnN0aXR1dGUlMkNfMTk0MHNfJTI4MTY4NTI0NjEyMjUlMjkuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/7/78/Atat%C3%BCrk_Boulevard%2C_Faculty_of_Language%2C_History_and_Geography%2C_%C4%B0smet_Pasha_Girls%27_Institute%2C_1940s_%2816852461225%29.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/f/fe/Transport_3.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8zLzNmLyVEMCVBNiVEMCVCNSVEMSU4MCVEMCVCQSVEMCVCMiVEMCVCMF8lRDAlOUQlRDAlQjUlRDAlQkYlRDAl

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/f/fa/Kazan-railstation-n.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/6/68/%D8%B5%D9%81%D9%88%D8%AA_%D8%AD%D8%AC%D8%A7%D8%B2%D9%8A_%D9%81%D9%8A_%D9%85%D9%8A%D8%AF%D8%A7%D9%86_%D8%A7%D9%84%D8%AA%D8%AD%D8%B1%D9%8A%D8%B1.png
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/7/75/Greece_Kazakhstan_Locator.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/5/59/Young_Leslie_Goonew

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/5/56/Municipalities_of_Serbia_Trgovi%C5%A1te.png
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/6/65/Huxley_crater_AS17-M-2904.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/7/78/Ved_Stranden_%281908%29.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/com

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/8/8e/Dominello_with_coalition_leadership.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e4/Dantes_View_1.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/6/62/Porshnev.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/9/9b/VMM-268_insignia.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.o

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/5/5a/Warrington%2C_Andy.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/8/8c/Charles_Frederick_Chubb%2C_solicitor_and_mayor_of_Ipswich%2C_Queensland%2C_Australia.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/e/eb/Divisional_Public_School_Faisalabad_-_School_mainground.png
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_p

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8zLzM3LyVEMCVBOCVEMCVCMCVEMCVCQiVEMCVCMiVEMCVCMF8lRDAlOUElRDAlQjglRDAlQkElRDAlQkUlRDAlQjQlRDAlQjclRDAlQjVfJUQwJTkwJUQwJUIyJUQxJTgyJUQwJUJFJUQwJUJGJUQwJUJFJUQxJTgwJUQxJTgyJUQxJTgwJUQwJUI1JUQxJTgyLl8xOTIwLmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/3/37/%D0%A8%D0%B0%D0%BB%D0%B2%D0%B0_%D0%9A%D0%B8%D0%BA%D0%BE%D0%B4%D0%B7%D0%B5_%D0%90%D0%B2%D1%82%D0%BE%D0%BF%D0%BE%D1%80%D1%82%D1%80%D0%B5%D1%82._1920.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/0/00/Henry_S._Aurand.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https

HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/d/da/2020-07-02_15_36_26_will_rename_and_categorize_soon_50.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/f/f7/I-12_eastbound_ramp_at_LA_59_Clarification_for_I-59.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/46/Hugh_Patrick_McKenna_FRCN.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/4e/Ul._Nad_%C5%81yn%C4%85_25%2C_Bart%C4%85g.JPG
[Errno 36] File name too long: 

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b9/Volkswagen_Passat_sedan_--_08-12-2010.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/4a/P1210032_%D0%B2%D1%83%D0%BB._%D0%9F%D0%BE%D0%B4%D1%96%D0%BB%D1%8C%D1%81%D1%8C%D0%BA%D0%B0%2C_39_%D0%9F%D0%BE%D0%B6%D0%B5%D0%B6%D0%BD%D0%B5_%D0%B4%D0%B5%D0%BF%D0%BE.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/f/f6/Vladimir_Suleimanov_2011.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to g

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b7/Mohammad_Bolboli_-_24_August_2019.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/1/17/Petlyakov_Pe-2_at_Poltava%2C_Russia.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/b0/India_-_Kolkata_street_beggar_-_3246.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/f/fc/Rose_bud_%286027707280%29.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/42/Rembrandt_Harmensz._van_Rijn_095.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/40/Khodadoust_%2816%29.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/e/e6/T

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/d/d2/Simajigawa-2086-r1.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/1/19/Songbin_Road_Station.JPG
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzI2L0FSWkFLQU5fTkVHSFVUU0lfVkFOUV9NT05BU1RFUllfJUQ1JThFJUQ1JUExJUQ1JUI2JUQ1JUExJUQ1JUFGJUQ1JUExJUQ1JUI2XyVENSVCMCVENSVBMSVENSVCNCVENSVBMSVENSVBQyVENSVBQiVENiU4MF8lRDUlODYlRDUlQTUlRDUlQjIlRDUlQjglRDYlODIlRDYlODElRDUlQUJfJUQ1JUJFJUQ1JUExJUQ1JUI2JUQ2JTg0XyUyOCVENCVCMSVENiU4MCVENSVBNiVENSVBMSVENSVBRiVENSVBMSVENSVCNiVENSVBQl8lRDUlQkUlRDUlQTElRDUlQjYlRDYlODQlMkNfJUQ

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/1/15/Argylebuilding107.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/4/4a/Croatia_Slovenia_Locator.png
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/3/37/388SM_City_Taytay_05.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/en/e/e5/Badagada_High-School_1988_Gradutes_Group-Picture_1987.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzJkLyVENSU4RSVENSVBMSVENSVBNiVENSVBMyVENSVBNSVENSVCNl8lRDUlOEQlRDUlQTElRDYlODAlRDUlQ

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/e/ec/Norcatur%2C_Kansas_1937_city_hall_from_NW_1.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/5/57/Paulwaggoner2010.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy84LzhmL1VTX05hdnlfMDMwNzA4LU4tNTg2MkQtMTI3X01lbWJlcnNfb2ZfdGhlX1UuUy5fTmF2eV9DZXJlbW9uaWFsX0d1YXJkX3N0YW5kX2luX2Zvcm1hdGlvbl9uZXh0X3RvX3RoZV9Mb25lX1NhaWxvcl9zdGF0dWVfYXRfdGhlX1UuUy5fTmF2eV9NZW1vcmlhbC5qcGc=.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/8/8f/US_Navy_030708-N-5862D-127_Members_of_the_U.S._Navy_Ceremon

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/b/ba/Hyun_Hong-choo_speaking_to_the_Carnegie_Endowment_for_International_Peace_on_9_September_2015.png
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/6/61/Saint_Paul_Union_Depot_-_waiting_room_-_2015-07-26.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/f/fe/Flag_of_the_Italian_Arberesh.svg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/3/3b/PAL-V_ONE_landing.jpg
HTTP Error 4

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/2/2c/Ashley_Sutton_-_2017_BTCC_Knockhill_%28Sunday%2C_R2_podium%29.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/7/7c/Gipsyhill1.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzIyLyVENSU4QyVENSVBNSVENSVBMyVENSVBQiVENSVCNiVENSVBMV8lRDUlODIlRDUlQTElRDUlQTYlRDUlQTElRDYlODAlRDUlQjUlRDUlQTElRDUlQjYlRDUlQUJfJUQ1JUIwJUQ1JUI4JUQ2JTgyJUQ1JUI3JUQ1JUExJUQ1JUJGJUQ1JUExJUQ1JUFEJUQ1JUJGJUQ1JUExJUQ1JUFGJUQ1JUE4LmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/2/22/%D5%8C%D5%A5%D5%A3%D

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9jL2M1LyVEMCU5RiVEMCVCMCVEMCVCQiVEMCVCMCVEMSU4Nl8lRDAlQkElRDElODMlRDAlQkIlRDElOEMlRDElODIlRDElODMlRDElODAlRDAlQjhfJUQxJTgyJUQwJUIwXyVEMSU4MiVEMCVCNSVEMSU4NSVEMCVCRCVEMSU5NiVEMCVCQSVEMCVCOCUyQ18lRDAlOUElRDElODAlRDAlQjAlRDAlQkMlRDAlQjAlRDElODIlRDAlQkUlRDElODAlRDElODElRDElOEMlRDAlQkFfREpJXzAwMDIuanBn.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/c/c5/%D0%9F%D0%B0%D0%BB%D0%B0%D1%86_%D0%BA%D1%83%D0%BB%D1%8C%D1%82%D1%83%D1%80%D0%B8_%D1%82%D0%B0_%D1%82%D0%B5%D1%85%D0%BD%D1%96%D0%BA%D0%B8%2C_%D0%9A%D1%80%D0%B0%D0%BC%D0%B0%D1%82%D0%BE%D1%80%D1%81%D1%8C%D0%BA_DJI_0002.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/0/0d/PosterA%26E_02.png
HTTP Error 429: Too many 

[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy8yLzI4LzIwMTQtMDYtMTJfMTBfMzZfMDNfVmlld19ub3J0aF9hbG9uZ19VLlMuX1JvdXRlXzk1XyUyOFdlc3RfV2lubmVtdWNjYV9Cb3VsZXZhcmQlMjlfbmVhcl9NZWxhcmtleV9TdHJlZXRfYW5kX05ldmFkYV9TdGF0ZV9Sb3V0ZV8yODlfJTI4V2lubmVtdWNjYV9Cb3VsZXZhcmQlMjlfaW5fV2lubmVtdWNjYSUyQ19OZXZhZGEuSlBH.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/2/28/2014-06-12_10_36_03_View_north_along_U.S._Route_95_%28West_Winnemucca_Boulevard%29_near_Melarkey_Street_and_Nevada_State_Route_289_%28Winnemucca_Boulevard%29_in_Winnemucca%2C_Nevada.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/6/6e/Parkside_Mental_Hospital%28GN05223%29.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to 

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/2/26/WLP14-ri-0633-_Ulrich_Kelber_%28SPD%29.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/9/9c/David_Game_College_Logo.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9jL2NlLyVEMCU5MiVEMSU4MyVEMCVCQi5fJUQwJTkyJUQwJUI1JUQwJUJCJUQwJUI4JUQwJUJBJUQwJUIwXyVEMCVBMSVEMCVCMCVEMCVCNCVEMCVCRSVEMCVCMiVEMCVCMF8tXyVEMCVCRiVEMSU4MCVEMCVCRSVEMCVCMi5fJUQwJUE1JUQwJUIwJUQwJUJCJUQxJTgyJUQxJTgzJUQxJTgwJUQwJUI4JUQwJUJEJUQxJTgxJUQxJThDJUQwJUJBJUQwJUI4JUQwJUI5LkpQRw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/c/ce/%D0%92%D1%83%D0%BB._%D0%92%D0%B5%D0%BB%D0%B8%D0%BA%D0%B0_%D0%A1%D0%B0%D0%B4%D0%BE%D0%B2%D0%B0_-_%D0%BF%D1%80%

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/4/48/Mrs._William_Robinson_House_Dec10_02.jpg
HTTP Error 404: Not Found
I could not download https://upload.wikimedia.org/wikipedia/commons/e/eb/2020-07-09_08_34_48_will_rename_and_categorize_soon_10.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/0/00/Defective_Apple_MagSafe_01.JPG
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/d/d5/1985_Vladimir_Tomilovsky_1.jpeg
HTTP Error 429: Too many requests. Please comply with the 

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/7/7e/Matija_Cop.jpg
[Errno 36] File name too long: '/raid/AISSEL/Hamed/datasets/wit/images/aHR0cHM6Ly91cGxvYWQud2lraW1lZGlhLm9yZy93aWtpcGVkaWEvY29tbW9ucy9jL2M4LyVEMCU5QSVEMCVCRSVEMCVCQyVEMCVCMCVEMCVCRCVEMCVCNCVEMCVCMF8lRDAlQTQlRDAlOUFfJUMyJUFCJUQwJTlCJUQxJTk2JUQwJUIyJUQwJUI1JUQxJTgwJUQwJUJGJUQxJTgzJUQwJUJCJUQxJThDJUMyJUJCXyVEMCVCRiVEMSU4MCVEMCVCOCVEMCVCMSVEMSU4MyVEMCVCQiVEMCVCMF8lRDAlQjQlRDAlQkVfJUQwJTlBJUQwJUI4JUQxJTk0JUQwJUIyJUQwJUIwJTJDXzIwMTglMkNfNV8lMjhjcm9wcGVkJTI5LmpwZw==.jpg'
I could not download https://upload.wikimedia.org/wikipedia/commons/c/c8/%D0%9A%D0%BE%D0%BC%D0%B0%D0%BD%D0%B4%D0%B0_%D0%A4%D0%9A_%C2%AB%D0%9B%D1%96%D0%B2%D0%B5%D1%80%D0%BF%D1%83%D0%BB%D1%8C%C2%BB_%D0%BF%D1%80%D0%B8%D0%B1%D1%83%D0%BB%D0%B0_%D0%B4%D0%BE_%D0%9A%D0%B8%D1%94%D0%B2%D0%B0%2C_201

HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download http://upload.wikimedia.org/wikipedia/commons/d/d0/Tuzi_pueblo01.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/2/2e/Hama%2C_Syria.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimedia.org/wikipedia/commons/0/0e/Red_Road_Flats%2C_Balornock_%28from_Petershill_Road%29_-_geograph.org.uk_-_1224863.jpg
HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy
I could not download https://upload.wikimed