In [1]:
from datasets import load_dataset
from tqdm.notebook import tqdm
from pathlib import Path
import polars as pl

pl.Config(fmt_str_lengths=1000, tbl_width_chars=1000)

In [2]:
ds = load_dataset("yuvalkirstain/laion-hd-subset", split=None)

In [None]:
# count = 0
# for item in ds['train']:
#     print(item.values())
#     count += 1
#     if count ==20:
#         print(item.keys())
#         break

In [122]:
import re
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

def clean_text(text):
    # 1) Lowercase
    text = text.lower()
    
    # 2) Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)

    # 2.5) Remove non-ascii characters
    text = text.encode("ascii", "ignore").decode()
    
    # 3) Remove numbers
    text = re.sub(r'\d+', '', text)
    
    # 4) Tokenize
    tokens = nltk.word_tokenize(text)
    
    # 5) Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [w for w in tokens if w not in stop_words]
    
    # 6) Lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(w) for w in tokens]
    
    # 7) Remove extra whitespace / rejoin if desired
    cleaned_text = " ".join(tokens)
    
    return cleaned_text

# # Example usage
# raw_text = "Hello there!!! This is a sample text, containing numbers like 123 and punctuation."
# cleaned = clean_text(raw_text)
# print(cleaned)
# # Output might be: "hello sample text containing number like punctuation"

[nltk_data] Downloading package punkt to /home/fbernardi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/fbernardi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/fbernardi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [119]:
def filter_captions(caption, min_words=2, min_chars=5):
    # Strip leading/trailing whitespace
    cap = caption.strip()
    
    # Split the caption into words
    words = cap.split()
    
    # Count characters (excluding leading/trailing whitespace)
    char_count = len(cap)
    
    # Check conditions
    condition =  len(words) > min_words and char_count > min_chars
    
    return condition

In [118]:
df_train['caption']

caption
str
"""Odisha - 2 : Simlipal"""
"""Page 15"""
"""18th century man silhouettes"""
"""¡Ánimo juventud! (2020) 1080p WEB-DL Latino"""
"""""Landing Page Template namens """"Kids Tree Preschool"""" #65801"""""
…
"""SOFABAB"""
"""From Dusk Till Dawn - The Trilogy (Blu-Ray) - 4"""
"""Vintage Champion Grant Hill Jersey Medium"""
"""SOFABAB"""


In [3]:
import fasttext
import os
import re

def fasttext_load_model(model_name: str, dir_name: str, cwd: str):
    try:
        model_path = os.path.join(cwd, dir_name, model_name)
        model = fasttext.load_model(model_path)
    except:
        parent_dir = os.path.dirname(cwd)
        return fasttext_load_model(model_name, dir_name, parent_dir)
    else:
        return model
    
def extract_str_from_pattern(s: str, regex: str) -> str:
    pattern = re.compile(regex)
    match = pattern.match(s)
    tot_groups = len(match.groups())
    matched_groups = match.groups(tot_groups)
    return matched_groups

def is_english_sentence(sentence: str):
    predictions, score = model.predict(sentence.strip().replace('\n', ' '))
    return {"is_english":predictions[0] == '__label__en', 
            "lang_detected":predictions[0], 
            "score":score[0]}

if __name__ == "__main__":
    model = fasttext_load_model('lid.176.bin', 'models', os.getcwd()) 
    input_file = "/home/fbernardi/Documents/fair_spoke_8/train_cap.txt"
    with open(input_file, 'r', encoding='utf-8') as f:
        for line in f:
            result = is_english_sentence(line)
            detected_lang = extract_str_from_pattern(result['lang_detected'], r'^__label__(.+)$')
            print(detected_lang[0], result)
        

fr {'is_english': False, 'lang_detected': '__label__fr', 'score': 0.26760274171829224}
en {'is_english': True, 'lang_detected': '__label__en', 'score': 0.3941895663738251}
en {'is_english': True, 'lang_detected': '__label__en', 'score': 0.7033547759056091}
es {'is_english': False, 'lang_detected': '__label__es', 'score': 0.5783684253692627}
en {'is_english': True, 'lang_detected': '__label__en', 'score': 0.6295984387397766}
en {'is_english': True, 'lang_detected': '__label__en', 'score': 0.8507919311523438}
en {'is_english': True, 'lang_detected': '__label__en', 'score': 0.15321119129657745}
de {'is_english': False, 'lang_detected': '__label__de', 'score': 0.9842501282691956}
ko {'is_english': False, 'lang_detected': '__label__ko', 'score': 0.7064388394355774}
en {'is_english': True, 'lang_detected': '__label__en', 'score': 0.36353638768196106}
en {'is_english': True, 'lang_detected': '__label__en', 'score': 0.7940080761909485}
en {'is_english': True, 'lang_detected': '__label__en', 's

In [4]:
subset = [
    'similarity',
    'hash',
    'punsafe',
    'pwatermark',
    'LANGUAGE',
    'caption',
    'url',
    'key',
    'width',
    'height',
    'original_width',
    'original_height'
]

splits = {'train': 'data/train-*-of-*.parquet', 'test': 'data/test-00000-of-00001-f5aa494af1d25f74.parquet'}
df_train = pl.read_parquet('hf://datasets/yuvalkirstain/laion-hd-subset/' + splits['train'])[subset]
df_test = pl.read_parquet('hf://datasets/yuvalkirstain/laion-hd-subset/' + splits['test'])[subset]

In [5]:
map_data_types = pl.Struct({
                        pl.Field("is_english", pl.Boolean),
                        pl.Field("lang_detected", pl.String),
                        pl.Field("score", pl.Float64)
                        })

In [62]:
df_train

similarity,hash,punsafe,pwatermark,LANGUAGE,caption,url,key,width,height,original_width,original_height,is_english,lang_detected,score
f64,i64,f64,f64,str,str,str,str,i64,i64,i64,i64,bool,str,f64
0.264381,8441708930180998904,0.000175,0.009912,"""sw""","""Odisha - 2 : Simlipal""","""https://img.over-blog-kiwi.com/0/99/16/03/20190110/ob_987ce2_dsc03556.jpg""","""000004145""",1024,1024,1600,1600,false,"""fr""",0.267603
0.284093,1252282617735001209,0.000183,,"""en""","""Page 15""","""http://p.calameoassets.com/161128121756-0befcc30be0f41b9dbb533a9ab79af9d/p15.jpg""","""000011979""",1024,1447,1190,1682,true,"""en""",0.39419
0.303465,4131493839794366439,0.000483,0.478444,"""en""","""18th century man silhouettes""","""https://www.freelogovectors.net/wp-content/uploads/2013/12/18th-century-man-silhouettes.jpg""","""000010373""",1663,1024,2396,1475,true,"""en""",0.703355
0.266388,-5846660546473290352,0.384948,0.002599,"""lt""","""¡Ánimo juventud! (2020) 1080p WEB-DL Latino""","""https://1.bp.blogspot.com/-Y5-QEi-CWiw/YCF3S1WubqI/AAAAAAAAZpU/rkUm6DXFeEY_CfNFpGLeg7t5xN-y37tQwCLcBGAsYHQ/s0/%25C2%25A1%25C3%2581nimo%2Bjuventud%2521%2B%25282020%2529%2B1080p%2BWEB-DL%2BLatino%2B-%2BDescaragtepelis.com.mkv_snapshot_00.21.17.276.jpg""","""000007873""",1820,1024,1920,1080,false,"""es""",0.578368
0.310429,6939400869967208633,0.000073,0.295235,"""nolang""","""""Landing Page Template namens """"Kids Tree Preschool"""" #65801""""","""https://s.tmimgcdn.com/scr/65800/landing-page-template-namens-kids-tree-preschool-65801_65801-original.jpg""","""000010685""",1024,1250,1024,1250,true,"""en""",0.629598
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.288857,-3754978981588788335,0.000742,0.402525,"""ja""","""SOFABAB""","""https://albatrossguitar.hipcast.com/albumart/1000_1627922587.jpg""","""000003300""",1024,1024,1683,1683,false,"""es""",0.202791
0.32071,8101346899319823492,0.002733,0.29081,"""en""","""From Dusk Till Dawn - The Trilogy (Blu-Ray) - 4""","""https://cdn.ozone.bg/media/catalog/product/cache/1/image/a4e40ebdc3e371adff845072e1c73f37/f/r/e93b322c98386df3534fa3306b3a200a/from-dusk-till-dawn---the-trilogy-blu-ray-36.jpg""","""000011392""",1024,1323,1213,1567,true,"""en""",0.437248
0.330339,-230864369060936050,0.000091,0.327558,"""en""","""Vintage Champion Grant Hill Jersey Medium""","""https://cdn.shopify.com/s/files/1/2974/2654/products/hill_jersey_blue.jpg?v=1611068917""","""000015248""",1024,1024,2048,2048,true,"""en""",0.543158
0.296867,5094972792430996628,0.000742,0.402556,"""ja""","""SOFABAB""","""https://albatrossguitar.hipcast.com/albumart/1000_1624136470.jpg""","""000010705""",1024,1024,1683,1683,false,"""es""",0.202791


In [63]:
df_train['hash'].unique().count() == df_train.shape[0]

True

In [41]:
df_train.shape[0]

13451

In [64]:
df_train['key'].unique()

key
str
"""000011949"""
"""000013817"""
"""000010617"""
"""000005204"""
"""000000700"""
…
"""000012328"""
"""000002313"""
"""000013787"""
"""000003701"""


In [65]:
df_train['LANGUAGE'].value_counts().sort('count', descending=True)

LANGUAGE,count
str,u32
"""en""",5911
"""nolang""",3219
"""fr""",379
"""de""",354
"""ru""",299
…,…
"""tg""",2
"""mi""",2
"""bn""",1
"""ka""",1


In [9]:
df_train = df_train.with_columns(
       df_train['caption'].map_elements(lambda x: is_english_sentence(x), return_dtype=map_data_types, skip_nulls=True).alias('lang_detection')
).unnest("lang_detection")

In [10]:
df_train

similarity,hash,punsafe,pwatermark,LANGUAGE,caption,url,key,width,height,original_width,original_height,is_english,lang_detected,score
f64,i64,f64,f64,str,str,str,str,i64,i64,i64,i64,bool,str,f64
0.264381,8441708930180998904,0.000175,0.009912,"""sw""","""Odisha - 2 : Simlipal""","""https://img.over-blog-kiwi.com…","""000004145""",1024,1024,1600,1600,false,"""__label__fr""",0.267603
0.284093,1252282617735001209,0.000183,,"""en""","""Page 15""","""http://p.calameoassets.com/161…","""000011979""",1024,1447,1190,1682,true,"""__label__en""",0.39419
0.303465,4131493839794366439,0.000483,0.478444,"""en""","""18th century man silhouettes""","""https://www.freelogovectors.ne…","""000010373""",1663,1024,2396,1475,true,"""__label__en""",0.703355
0.266388,-5846660546473290352,0.384948,0.002599,"""lt""","""¡Ánimo juventud! (2020) 1080p …","""https://1.bp.blogspot.com/-Y5-…","""000007873""",1820,1024,1920,1080,false,"""__label__es""",0.578368
0.310429,6939400869967208633,0.000073,0.295235,"""nolang""","""""Landing Page Template namens …","""https://s.tmimgcdn.com/scr/658…","""000010685""",1024,1250,1024,1250,true,"""__label__en""",0.629598
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.288857,-3754978981588788335,0.000742,0.402525,"""ja""","""SOFABAB""","""https://albatrossguitar.hipcas…","""000003300""",1024,1024,1683,1683,false,"""__label__es""",0.202791
0.32071,8101346899319823492,0.002733,0.29081,"""en""","""From Dusk Till Dawn - The Tril…","""https://cdn.ozone.bg/media/cat…","""000011392""",1024,1323,1213,1567,true,"""__label__en""",0.437248
0.330339,-230864369060936050,0.000091,0.327558,"""en""","""Vintage Champion Grant Hill Je…","""https://cdn.shopify.com/s/file…","""000015248""",1024,1024,2048,2048,true,"""__label__en""",0.543158
0.296867,5094972792430996628,0.000742,0.402556,"""ja""","""SOFABAB""","""https://albatrossguitar.hipcas…","""000010705""",1024,1024,1683,1683,false,"""__label__es""",0.202791


In [11]:
df_train = df_train.with_columns(
    df_train['lang_detected'].map_elements(
        lambda x: extract_str_from_pattern(x, r'^__label__(.+)$')[0], 
        return_dtype=pl.String, 
        skip_nulls=False)
)

In [12]:
df_train

similarity,hash,punsafe,pwatermark,LANGUAGE,caption,url,key,width,height,original_width,original_height,is_english,lang_detected,score
f64,i64,f64,f64,str,str,str,str,i64,i64,i64,i64,bool,str,f64
0.264381,8441708930180998904,0.000175,0.009912,"""sw""","""Odisha - 2 : Simlipal""","""https://img.over-blog-kiwi.com…","""000004145""",1024,1024,1600,1600,false,"""fr""",0.267603
0.284093,1252282617735001209,0.000183,,"""en""","""Page 15""","""http://p.calameoassets.com/161…","""000011979""",1024,1447,1190,1682,true,"""en""",0.39419
0.303465,4131493839794366439,0.000483,0.478444,"""en""","""18th century man silhouettes""","""https://www.freelogovectors.ne…","""000010373""",1663,1024,2396,1475,true,"""en""",0.703355
0.266388,-5846660546473290352,0.384948,0.002599,"""lt""","""¡Ánimo juventud! (2020) 1080p …","""https://1.bp.blogspot.com/-Y5-…","""000007873""",1820,1024,1920,1080,false,"""es""",0.578368
0.310429,6939400869967208633,0.000073,0.295235,"""nolang""","""""Landing Page Template namens …","""https://s.tmimgcdn.com/scr/658…","""000010685""",1024,1250,1024,1250,true,"""en""",0.629598
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.288857,-3754978981588788335,0.000742,0.402525,"""ja""","""SOFABAB""","""https://albatrossguitar.hipcas…","""000003300""",1024,1024,1683,1683,false,"""es""",0.202791
0.32071,8101346899319823492,0.002733,0.29081,"""en""","""From Dusk Till Dawn - The Tril…","""https://cdn.ozone.bg/media/cat…","""000011392""",1024,1323,1213,1567,true,"""en""",0.437248
0.330339,-230864369060936050,0.000091,0.327558,"""en""","""Vintage Champion Grant Hill Je…","""https://cdn.shopify.com/s/file…","""000015248""",1024,1024,2048,2048,true,"""en""",0.543158
0.296867,5094972792430996628,0.000742,0.402556,"""ja""","""SOFABAB""","""https://albatrossguitar.hipcas…","""000010705""",1024,1024,1683,1683,false,"""es""",0.202791


In [143]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
                    rows=2, cols=1,
                    shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=("Dataset Language", "Language Detected")
                    )

fig.add_trace(
    go.Bar(
        x=df_train['LANGUAGE'].value_counts().sort('count', descending=False)['count'].to_list(), 
        y=df_train['LANGUAGE'].value_counts().sort('count', descending=False)['LANGUAGE'].to_list(),
        orientation='h'
        ),
    row=1, col=1
)

fig.add_trace(
    go.Bar(
        x=df_train['lang_detected'].value_counts().sort('count', descending=False)['count'].to_list(), 
        y=df_train['lang_detected'].value_counts().sort('count', descending=False)['lang_detected'].to_list(),
        orientation='h'
        ),
    row=2, col=1
)

fig.update_layout(height=800, width=1000, title_text="Side By Side Subplots")
fig.show()

In [87]:
# Same Detected Language

df_train.filter(
    (pl.col('LANGUAGE') == pl.col('lang_detected'))
).select(['LANGUAGE', 'lang_detected', 'caption'])

LANGUAGE,lang_detected,caption
str,str,str
"""en""","""en""","""Page 15"""
"""en""","""en""","""18th century man silhouettes"""
"""en""","""en""","""Men's Fanatics Branded Orange Florida Gators Personalized Basketball Pullover Hoodie 3"""
"""en""","""en""","""Happee Birthdae 3x4 Stamp Set"""
"""de""","""de""","""""""""ND Filter - auch """"""""Sonnenbrille für das Objektiv"""""""" genannt"""""""""
…,…,…
"""en""","""en""","""Mini Skirts for Fall Under $50 & On Mondays We Link Up by popular Chicago fashion blog, Glass of Glam: image of a woman wearing a fall mini skirt."""
"""es""","""es""","""Villa a la venta en la Cala de la Granadella con vistas al mar - Javea - Costa Blanca"""
"""en""","""en""","""From Dusk Till Dawn - The Trilogy (Blu-Ray) - 4"""
"""en""","""en""","""Vintage Champion Grant Hill Jersey Medium"""


In [84]:
# Only Detected Language

df_train.filter(
    (pl.col('LANGUAGE') == "nolang")
).select(['LANGUAGE', 'lang_detected', 'caption'])

LANGUAGE,lang_detected,caption
str,str,str
"""nolang""","""en""","""""Landing Page Template namens """"Kids Tree Preschool"""" #65801"""""
"""nolang""","""en""","""Ottawa Karaoke | Karaoke Wednesday, Friday, Sunday"""
"""nolang""","""en""","""""Untitled, 2013, wood, paint, nails, 12-3/4 x 3 x 20"""""""""
"""nolang""","""zh""","""Re:[原创]【KMG】长水机场一组 1600大图 BOEING 737-700 B-2503 中国昆明长水国际机场"""
"""nolang""","""en""","""NUNA, 2019 MIXX<sup>™</sup> Stroller & PIPA<sup>™</sup> Lite LX Infant Car Seat Set Travel System, Alternate thumbnail 4, color, VERONA CAVIAR"""
…,…,…
"""nolang""","""en""","""[MyGirl] Vol.012 Yu Zhu 23P, Lovely, Pure, Yu Zhu"""
"""nolang""","""en""","""Marvel Secret Warriors Rising Daisy Johnson MARVEL'S QUAKE Identity Doll Gift"""
"""nolang""","""en""","""Attorney Desk"""
"""nolang""","""en""","""Heart Health"""


In [89]:
# Different Detected Language

df_train.filter(
    (pl.col('LANGUAGE') != "nolang")
    & (pl.col('LANGUAGE') != pl.col('lang_detected'))
).select(['LANGUAGE', 'lang_detected', 'caption'])

LANGUAGE,lang_detected,caption
str,str,str
"""sw""","""fr""","""Odisha - 2 : Simlipal"""
"""lt""","""es""","""¡Ánimo juventud! (2020) 1080p WEB-DL Latino"""
"""fil""","""en""","""Espinosa Crema No. 4 Robusto Cigars - 5.5 x 52 (Pack of 5)"""
"""lb""","""de""","""3D Stadt New York 433 Rutschfest Büro Schreibtisch Schreibtisch Schreibtisch Mauspad Tastatur Spiel e136a0"""
"""ca""","""zh""","""T-SHIRT - 300 REC"""
…,…,…
"""bg""","""en""","""Audi RS 3"""
"""en""","""es""","""Infactory Blacklight-esmalte de uñas con Glow-in-the-dark-efecto: Amazon.es: Belleza"""
"""it""","""en""","""Lamborghini Gallardo by Cosa Design"""
"""ja""","""es""","""SOFABAB"""


In [90]:
df_train['is_english'].value_counts()   

is_english,count
bool,u32
False,3993
True,9458


In [94]:
# English Captions

df_train.filter(
    (pl.col('LANGUAGE') == 'en')
    & (pl.col('lang_detected') == 'en')
)['LANGUAGE', 'lang_detected', 'caption']

LANGUAGE,lang_detected,caption
str,str,str
"""en""","""en""","""Page 15"""
"""en""","""en""","""18th century man silhouettes"""
"""en""","""en""","""Men's Fanatics Branded Orange Florida Gators Personalized Basketball Pullover Hoodie 3"""
"""en""","""en""","""Happee Birthdae 3x4 Stamp Set"""
"""en""","""en""","""308/4 Honeysuckle Drive, Newcastle"""
…,…,…
"""en""","""en""","""Nfl Atlanta Falcons Nike Champ Drive Sideline Full Zip Jacket Black Zip Jackets Jackets Stylish Jackets"""
"""en""","""en""","""Mini Skirts for Fall Under $50 & On Mondays We Link Up by popular Chicago fashion blog, Glass of Glam: image of a woman wearing a fall mini skirt."""
"""en""","""en""","""From Dusk Till Dawn - The Trilogy (Blu-Ray) - 4"""
"""en""","""en""","""Vintage Champion Grant Hill Jersey Medium"""


In [95]:
# Detected English Captions

df_train.filter(
    (pl.col('LANGUAGE') != 'en')
    & (pl.col('lang_detected') == 'en')
)['LANGUAGE', 'lang_detected', 'caption']

LANGUAGE,lang_detected,caption
str,str,str
"""nolang""","""en""","""""Landing Page Template namens """"Kids Tree Preschool"""" #65801"""""
"""nolang""","""en""","""Ottawa Karaoke | Karaoke Wednesday, Friday, Sunday"""
"""nolang""","""en""","""""Untitled, 2013, wood, paint, nails, 12-3/4 x 3 x 20"""""""""
"""fil""","""en""","""Espinosa Crema No. 4 Robusto Cigars - 5.5 x 52 (Pack of 5)"""
"""nolang""","""en""","""NUNA, 2019 MIXX<sup>™</sup> Stroller & PIPA<sup>™</sup> Lite LX Infant Car Seat Set Travel System, Alternate thumbnail 4, color, VERONA CAVIAR"""
…,…,…
"""nolang""","""en""","""[MyGirl] Vol.012 Yu Zhu 23P, Lovely, Pure, Yu Zhu"""
"""nolang""","""en""","""Marvel Secret Warriors Rising Daisy Johnson MARVEL'S QUAKE Identity Doll Gift"""
"""nolang""","""en""","""Attorney Desk"""
"""nolang""","""en""","""Heart Health"""


In [108]:
# Detected English Captions

df_train.filter(
    (pl.col('LANGUAGE') == 'en')
    & (pl.col('lang_detected') != 'en')
)['LANGUAGE', 'lang_detected', 'caption']#.sample(10)

LANGUAGE,lang_detected,caption
str,str,str
"""en""","""fr""","""fruits-oranges-tangerines"""
"""en""","""es""","""FACTO / 2501 CHAIR"""
"""en""","""ja""","""QUIRKY DAYTIME"""
"""en""","""es""","""AXEL DENIM JACKET."""
"""en""","""fr""","""Used 2017 McLaren 570S Coupe | Greenwich, CT"""
…,…,…
"""en""","""es""","""Fondos de pantalla Dragon Age Inquisition"""
"""en""","""fr""","""Mini 04-07-18 © Florian Léger - SHARE & DARE-199.jpg"""
"""en""","""ja""","""NOCONA BLACK OSTRICH LEATHER WESTERN COWBOY ROCKABILLY DANCE BOOTS SIZE 4 B"""
"""en""","""fr""","""Red Dead Online semaine chasseur primes"""


In [109]:
# Non English Captions

df_train.filter(
 (pl.col('lang_detected') != 'en')
)['LANGUAGE', 'lang_detected', 'caption']#.sample(10)

LANGUAGE,lang_detected,caption
str,str,str
"""sw""","""fr""","""Odisha - 2 : Simlipal"""
"""lt""","""es""","""¡Ánimo juventud! (2020) 1080p WEB-DL Latino"""
"""de""","""de""","""""""""ND Filter - auch """"""""Sonnenbrille für das Objektiv"""""""" genannt"""""""""
"""ko""","""ko""","""이용매뉴얼_Page_16.png"""
"""zh""","""zh""","""麗寶-天空之夢 - 臺中市 - 旅遊美食討論區 - Mobile01"""
…,…,…
"""es""","""es""","""Villa a la venta en la Cala de la Granadella con vistas al mar - Javea - Costa Blanca"""
"""en""","""es""","""Infactory Blacklight-esmalte de uñas con Glow-in-the-dark-efecto: Amazon.es: Belleza"""
"""nolang""","""nl""","""weddingblogmannring-151.jpg"""
"""ja""","""es""","""SOFABAB"""
