1st attempt - with brandname
Product attribute	Example value
articleType	shirts
baseColour	navy blue
gender	men
masterCategory	apparel
season	fall
subCategory	topwear
usage	casual
brandName	turtle
ageGroup	adults-men
body or Garment Size	garment Measurements in
collar	spread collar
fabric	cotton
fit	slim fit
pattern	checked
sleeve length	long sleeves
description	Dark brown and indigo blue plaid check shirt with white accents

In [None]:
import pandas as pd
import os
import json
import re
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Ustawienia
DATASET_PATH = r"C:\Users\wrobl\Downloads\archive\fashion-dataset"
CSV_PATH = os.path.join(DATASET_PATH, 'styles.csv')
JSON_DIR = os.path.join(DATASET_PATH, 'styles')
OUTPUT_DIR = os.path.join(DATASET_PATH, 'vectors_output')
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Wczytaj model sentence-transformers
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Funkcje pomocnicze
def clean_html(raw_html):
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json(product_id):
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    with open(json_path, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
            return data.get('data', {})
        except Exception as e:
            print(f"Błąd wczytywania {json_path}: {e}")
            return {}

# funkcja budująca naturalny tekst
def build_natural_text(info, description):
    cechy = []

    if info.get('gender'): cechy.append(info['gender'].lower())
    if info.get('ageGroup'): cechy.append(info['ageGroup'].lower())
    if info.get('baseColour'): cechy.append(info['baseColour'].lower())
    if info.get('pattern'): cechy.append(info['pattern'].lower())
    if info.get('fit'): cechy.append(info['fit'].lower())
    if info.get('fabric'): cechy.append(info['fabric'].lower())
    if info.get('sleeve length'): cechy.append(info['sleeve length'].lower())
    if info.get('collar'): cechy.append(info['collar'].lower())
    if info.get('articleType'): cechy.append(info['articleType'].lower())

    cechy_str = ', '.join([c for c in cechy if c])

    size_str = f"Size: {info['body or Garment Size']}" if info.get('body or Garment Size') else ""
    brand_str = f"by {info['brandName']}" if info.get('brandName') else ""
    usage_str = f"for {info['usage'].lower()}" if info.get('usage') else ""
    season_str = f"{info['season'].capitalize()}" if info.get('season') else ""
    cat_str = f"category: {info['masterCategory'].lower()}" if info.get('masterCategory') else ""
    subcat_str = f"subcategory: {info['subCategory'].lower()}" if info.get('subCategory') else ""

    opis = f"This is a"
    if season_str: opis += f" {season_str}"
    if cechy_str: opis += f" {cechy_str}"
    if brand_str: opis += f" {brand_str}"
    if size_str: opis += f", {size_str}"
    if usage_str: opis += f", {usage_str}"
    if cat_str: opis += f", {cat_str}"
    if subcat_str: opis += f", {subcat_str}"
    opis = opis.strip(", ")
    opis += "."

    if description:
        opis += f" Description: {description}"

    return opis

# konfiguracje
selected_csv_features = [
    'articleType', 'baseColour', 'gender', 'masterCategory',
    'season', 'subCategory', 'usage'
]
selected_json_features = ['brandName', 'ageGroup']
selected_json_attributes = {
    'body or Garment Size': ['body or Garment Size', 'Body or Garment Size'],
    'collar': ['collar', 'Collar'],
    'fabric': ['fabric', 'Fabric'],
    'fit': ['fit', 'Fit'],
    'pattern': ['pattern', 'Pattern'],
    'sleeve length': ['sleeve length', 'Sleeve Length']
}

# Ładowanie danych
df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
df = df.set_index('id')
product_ids = df.index

# Przygotowanie na wyniki
vectors = {}
batch_texts = []
batch_ids = []

BATCH_SIZE = 128  # Dla efektywnego przetwarzania

for pid in tqdm(product_ids, desc="Processing products"):
    csv_row = df.loc[pid].fillna('').to_dict()
    json_data = read_json(pid)

    info = {f: csv_row.get(f, '') for f in selected_csv_features}
    for f in selected_json_features:
        info[f] = json_data.get(f, '')

    attrs = json_data.get("articleAttributes", {})
    for attr, keys in selected_json_attributes.items():
        found = ''
        if isinstance(attrs, dict):
            for k in keys:
                if k in attrs:
                    found = attrs[k]
                    break
        info[attr] = found

    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    input_string_natural = build_natural_text(info, description)

    batch_texts.append(input_string_natural)
    batch_ids.append(pid)

    # Batch processing dla efektywności
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

# Procesowanie ostatniej porcji
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# Zapisz wektory
vectors_path = os.path.join(OUTPUT_DIR, 'product_vectors.pkl')
with open(vectors_path, 'wb') as f:
    pickle.dump(vectors, f)

print(f"Wektory zostały zapisane do: {vectors_path}")

# dodatkowy CSV z wektorami (przydatne do szybkiego sprawdzania)
csv_output = pd.DataFrame({
    'id': list(vectors.keys()),
    'vector': [vec.tolist() for vec in vectors.values()]
})
csv_output.to_csv(os.path.join(OUTPUT_DIR, 'product_vectors.csv'), index=False)

print("Gotowe!")


2nd attempt - without brandname
Product attribute	Example value
articleType	shirts
baseColour	navy blue
gender	men
masterCategory	apparel
season	fall
subCategory	topwear
usage	casual
ageGroup	adults-men
body or Garment Size	garment Measurements in
collar	spread collar
fabric	cotton
fit	slim fit
pattern	checked
sleeve length	long sleeves
description	Dark brown and indigo blue plaid check shirt with white accents

In [None]:
import pandas as pd
import os
import json
import re
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Ustawienia
DATASET_PATH = r"C:\Users\wrobl\Downloads\archive\fashion-dataset"
CSV_PATH = os.path.join(DATASET_PATH, 'styles.csv')
JSON_DIR = os.path.join(DATASET_PATH, 'styles')
OUTPUT_DIR = os.path.join(DATASET_PATH, 'vectors_output')
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Wczytaj model sentence-transformers
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Funkcje pomocnicze
def clean_html(raw_html):
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json(product_id):
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    with open(json_path, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
            return data.get('data', {})
        except Exception as e:
            print(f"Błąd wczytywania {json_path}: {e}")
            return {}

# funkcja budująca naturalny tekst
def build_natural_text(info, description):
    cechy = []

    if info.get('gender'): cechy.append(info['gender'].lower())
    if info.get('ageGroup'): cechy.append(info['ageGroup'].lower())
    if info.get('baseColour'): cechy.append(info['baseColour'].lower())
    if info.get('pattern'): cechy.append(info['pattern'].lower())
    if info.get('fit'): cechy.append(info['fit'].lower())
    if info.get('fabric'): cechy.append(info['fabric'].lower())
    if info.get('sleeve length'): cechy.append(info['sleeve length'].lower())
    if info.get('collar'): cechy.append(info['collar'].lower())
    if info.get('articleType'): cechy.append(info['articleType'].lower())

    cechy_str = ', '.join([c for c in cechy if c])

    size_str = f"Size: {info['body or Garment Size']}" if info.get('body or Garment Size') else ""
    usage_str = f"for {info['usage'].lower()}" if info.get('usage') else ""
    season_str = f"{info['season'].capitalize()}" if info.get('season') else ""
    cat_str = f"category: {info['masterCategory'].lower()}" if info.get('masterCategory') else ""
    subcat_str = f"subcategory: {info['subCategory'].lower()}" if info.get('subCategory') else ""

    opis = f"This is a"
    if season_str: opis += f" {season_str}"
    if cechy_str: opis += f" {cechy_str}"
    if size_str: opis += f", {size_str}"
    if usage_str: opis += f", {usage_str}"
    if cat_str: opis += f", {cat_str}"
    if subcat_str: opis += f", {subcat_str}"
    opis = opis.strip(", ")
    opis += "."

    if description:
        opis += f" Description: {description}"

    return opis

# konfiguracje
selected_csv_features = [
    'articleType', 'baseColour', 'gender', 'masterCategory',
    'season', 'subCategory', 'usage'
]
selected_json_features = ['ageGroup']
selected_json_attributes = {
    'body or Garment Size': ['body or Garment Size', 'Body or Garment Size'],
    'collar': ['collar', 'Collar'],
    'fabric': ['fabric', 'Fabric'],
    'fit': ['fit', 'Fit'],
    'pattern': ['pattern', 'Pattern'],
    'sleeve length': ['sleeve length', 'Sleeve Length']
}

# Ładowanie danych
df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
df = df.set_index('id')
product_ids = df.index

# Przygotowanie na wyniki
vectors = {}
batch_texts = []
batch_ids = []

BATCH_SIZE = 128  # Dla efektywnego przetwarzania

for pid in tqdm(product_ids, desc="Processing products"):
    csv_row = df.loc[pid].fillna('').to_dict()
    json_data = read_json(pid)

    info = {f: csv_row.get(f, '') for f in selected_csv_features}
    for f in selected_json_features:
        info[f] = json_data.get(f, '')

    attrs = json_data.get("articleAttributes", {})
    for attr, keys in selected_json_attributes.items():
        found = ''
        if isinstance(attrs, dict):
            for k in keys:
                if k in attrs:
                    found = attrs[k]
                    break
        info[attr] = found

    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    input_string_natural = build_natural_text(info, description)

    batch_texts.append(input_string_natural)
    batch_ids.append(pid)

    # Batch processing dla efektywności
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

# Procesowanie ostatniej porcji
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# Zapisz wektory
vectors_path = os.path.join(OUTPUT_DIR, 'product_vectors_no_brand.pkl')
with open(vectors_path, 'wb') as f:
    pickle.dump(vectors, f)

print(f"Wektory zostały zapisane do: {vectors_path}")

# dodatkowy CSV z wektorami (przydatne do szybkiego sprawdzania)
csv_output = pd.DataFrame({
    'id': list(vectors.keys()),
    'vector': [vec.tolist() for vec in vectors.values()]
})
csv_output.to_csv(os.path.join(OUTPUT_DIR, 'product_vectors_no_brand.csv'), index=False)

print("Gotowe!")


3rd attempt - without the brand name and attributes that can be inferred from the description
Product attribute	Example value
articleType	shirts
baseColour	navy blue
gender	men
masterCategory	apparel
season	fall
subCategory	topwear
usage	casual
ageGroup	adults-men
description	Dark brown and indigo blue plaid check shirt with white accents

In [None]:
import pandas as pd
import os
import json
import re
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Ustawienia
DATASET_PATH = r"C:\Users\wrobl\Downloads\archive\fashion-dataset"
CSV_PATH = os.path.join(DATASET_PATH, 'styles.csv')
JSON_DIR = os.path.join(DATASET_PATH, 'styles')
OUTPUT_DIR = os.path.join(DATASET_PATH, 'vectors_output')
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Wczytaj model sentence-transformers
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Funkcje pomocnicze
def clean_html(raw_html):
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json(product_id):
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    with open(json_path, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
            return data.get('data', {})
        except Exception as e:
            print(f"Błąd wczytywania {json_path}: {e}")
            return {}

# funkcja budująca naturalny tekst
def build_natural_text(info, description):
    cechy = []

    if info.get('gender'): cechy.append(info['gender'].lower())
    if info.get('ageGroup'): cechy.append(info['ageGroup'].lower())
    if info.get('baseColour'): cechy.append(info['baseColour'].lower())
    if info.get('articleType'): cechy.append(info['articleType'].lower())

    cechy_str = ', '.join([c for c in cechy if c])

    size_str = f"Size: {info['body or Garment Size']}" if info.get('body or Garment Size') else ""
    usage_str = f"for {info['usage'].lower()}" if info.get('usage') else ""
    season_str = f"{info['season'].capitalize()}" if info.get('season') else ""
    cat_str = f"category: {info['masterCategory'].lower()}" if info.get('masterCategory') else ""
    subcat_str = f"subcategory: {info['subCategory'].lower()}" if info.get('subCategory') else ""

    opis = f"This is a"
    if season_str: opis += f" {season_str}"
    if cechy_str: opis += f" {cechy_str}"
    if size_str: opis += f", {size_str}"
    if usage_str: opis += f", {usage_str}"
    if cat_str: opis += f", {cat_str}"
    if subcat_str: opis += f", {subcat_str}"
    opis = opis.strip(", ")
    opis += "."

    if description:
        opis += f" Description: {description}"

    return opis

# konfiguracje
selected_csv_features = [
    'articleType', 'baseColour', 'gender', 'masterCategory',
    'season', 'subCategory', 'usage'
]
selected_json_features = ['ageGroup']

# Ładowanie danych
df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
df = df.set_index('id')
product_ids = df.index

# Przygotowanie na wyniki
vectors = {}
batch_texts = []
batch_ids = []

BATCH_SIZE = 128  # Dla efektywnego przetwarzania

for pid in tqdm(product_ids, desc="Processing products"):
    csv_row = df.loc[pid].fillna('').to_dict()
    json_data = read_json(pid)

    info = {f: csv_row.get(f, '') for f in selected_csv_features}
    for f in selected_json_features:
        info[f] = json_data.get(f, '')

    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    input_string_natural = build_natural_text(info, description)

    batch_texts.append(input_string_natural)
    batch_ids.append(pid)

    # Batch processing dla efektywności
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []


# Procesowanie ostatniej porcji
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# Zapisz wektory
vectors_path = os.path.join(OUTPUT_DIR, 'product_vectors_no_brand_and_attributes.pkl')
with open(vectors_path, 'wb') as f:
    pickle.dump(vectors, f)

print(f"Wektory zostały zapisane do: {vectors_path}")

# dodatkowy CSV z wektorami (przydatne do szybkiego sprawdzania)
csv_output = pd.DataFrame({
    'id': list(vectors.keys()),
    'vector': [vec.tolist() for vec in vectors.values()]
})
csv_output.to_csv(os.path.join(OUTPUT_DIR, 'product_vectors_no_brand_and_attributes.csv'), index=False)

print("Gotowe!")


4th attempt - only description
Product attribute	Example value
description	Dark brown and indigo blue plaid check shirt with white accents

In [None]:
import pandas as pd
import os
import json
import re
import pickle
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Ustawienia
DATASET_PATH = r"C:\Users\wrobl\Downloads\archive\fashion-dataset"
CSV_PATH = os.path.join(DATASET_PATH, 'styles.csv')
JSON_DIR = os.path.join(DATASET_PATH, 'styles')
OUTPUT_DIR = os.path.join(DATASET_PATH, 'vectors_output')
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Wczytaj model sentence-transformers
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Funkcje pomocnicze
def clean_html(raw_html):
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json(product_id):
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    with open(json_path, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
            return data.get('data', {})
        except Exception as e:
            print(f"Błąd wczytywania {json_path}: {e}")
            return {}

# Funkcja budująca naturalny tekst (sam opis)
def build_natural_text(description):
    if description:
        return f"This is a {description}"
    else:
        return "This is a product without description."

# Ładowanie danych
df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
df = df.set_index('id')
product_ids = df.index

# Przygotowanie na wyniki
vectors = {}
batch_texts = []
batch_ids = []

BATCH_SIZE = 128  # Dla efektywnego przetwarzania

for pid in tqdm(product_ids, desc="Processing products"):
    json_data = read_json(pid)

    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    input_string_natural = build_natural_text(description)

    batch_texts.append(input_string_natural)
    batch_ids.append(pid)

    # Batch processing dla efektywności
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

# Procesowanie ostatniej porcji
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# Zapisz wektory
vectors_path = os.path.join(OUTPUT_DIR, 'product_vectors_only_description.pkl')
with open(vectors_path, 'wb') as f:
    pickle.dump(vectors, f)

print(f"Wektory zostały zapisane do: {vectors_path}")

# Dodatkowy CSV z wektorami
csv_output = pd.DataFrame({
    'id': list(vectors.keys()),
    'vector': [vec.tolist() for vec in vectors.values()]
})
csv_output.to_csv(os.path.join(OUTPUT_DIR, 'product_vectors_only_description.csv'), index=False)

print("Gotowe!")

In [None]:
import pandas as pd
import os
import json
import re
import pickle
import torch  # ### <--- 1. IMPORTUJEMY TORCH
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Ustawienia
DATASET_PATH = r"C:\Users\wrobl\Downloads\archive\fashion-dataset"
CSV_PATH = os.path.join(DATASET_PATH, 'styles.csv')
JSON_DIR = os.path.join(DATASET_PATH, 'styles')
OUTPUT_DIR = os.path.join(DATASET_PATH, 'vectors_output')

os.makedirs(OUTPUT_DIR, exist_ok=True)

# ### <--- 2. SPRAWDZAMY CZY MAMY GPU I USTAWIAMY DEVICE
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"--------------------------------------------------")
print(f"HARDWARE CHECK: Używam urządzenia: {device.upper()}")
if device == "cuda":
    print(f"Karta graficzna: {torch.cuda.get_device_name(0)}")
    print(f"Dostępna pamięć VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("UWAGA: Kod nadal leci na CPU! Sprawdź instalację PyTorch CUDA.")
print(f"--------------------------------------------------")

# ### <--- 3. WCZYTUJEMY MODEL ZE WSKAZANIEM NA DEVICE
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device=device)

# Funkcje pomocnicze
def clean_html(raw_html):
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json(product_id):
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    with open(json_path, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
            return data.get('data', {})
        except Exception as e:
            print(f"Błąd wczytywania {json_path}: {e}")
            return {}

def build_natural_text(description):
    if description:
        return f"This is a {description}"
    else:
        return "This is a product without description."

# Ładowanie danych
df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
df = df.set_index('id')
product_ids = df.index

# Przygotowanie na wyniki
vectors = {}
batch_texts = []
batch_ids = []

BATCH_SIZE = 256

for pid in tqdm(product_ids, desc="Processing products"):
    json_data = read_json(pid)
    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    input_string_natural = build_natural_text(description)

    batch_texts.append(input_string_natural)
    batch_ids.append(pid)

    # Batch processing
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

# Procesowanie ostatniej porcji
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# Zapisz wektory
vectors_path = os.path.join(OUTPUT_DIR, 'product_vectors_only_description_gpu.pkl')
with open(vectors_path, 'wb') as f:
    pickle.dump(vectors, f)

print(f"Wektory zostały zapisane do: {vectors_path}")

# Dodatkowy CSV z wektorami
csv_output = pd.DataFrame({
    'id': list(vectors.keys()),
    'vector': [vec.tolist() for vec in vectors.values()]
})
csv_output.to_csv(os.path.join(OUTPUT_DIR, 'product_vectors_only_description_gpu.csv'), index=False)

print("Gotowe!")

Teraz podchodzimy do nowego modelu <3


1st attempt - with brandname
Product attribute	Example value
articleType	shirts
baseColour	navy blue
gender	men
masterCategory	apparel
season	fall
subCategory	topwear
usage	casual
brandName	turtle
ageGroup	adults-men
body or Garment Size	garment Measurements in
collar	spread collar
fabric	cotton
fit	slim fit
pattern	checked
sleeve length	long sleeves
description	Dark brown and indigo blue plaid check shirt with white accents

In [None]:
# ==========================================
# 1. KONFIGURACJA ŚRODOWISKA
# ==========================================
# Instalacja wymaganych bibliotek (uruchomi się cicho dzięki -q)
!pip install -q sentence-transformers accelerate

import os
import json
import re
import pickle
import zipfile
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
from google.colab import drive

# Montowanie Dysku Google
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ==========================================
# 2. DEFINICJA ŚCIEŻEK (Dostosuj folder!)
# ==========================================
# Folder na Twoim Dysku Google, gdzie trzymasz styles.zip i styles.csv
DRIVE_FOLDER = "/content/drive/MyDrive/mamamagisterka"

# Ścieżki wejściowe
ZIP_PATH = os.path.join(DRIVE_FOLDER, 'styles.zip')
CSV_PATH = os.path.join(DRIVE_FOLDER, 'styles.csv')

# Folder roboczy w Colabie (szybki dysk tymczasowy)
WORK_DIR = "/content/data"
JSON_DIR = os.path.join(WORK_DIR, 'styles')
os.makedirs(WORK_DIR, exist_ok=True)

# Folder wyjściowy (zapis wyników na Dysk Google)
OUTPUT_DIR = DRIVE_FOLDER

# ==========================================
# 3. PRZYGOTOWANIE DANYCH
# ==========================================
# Rozpakowanie plików JSON z archiwum ZIP (tylko jeśli jeszcze nie istnieją)
if not os.path.exists(JSON_DIR):
    print(f"Rozpakowywanie {ZIP_PATH} do {WORK_DIR}...")
    try:
        with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
            zip_ref.extractall(WORK_DIR)
        print("Rozpakowywanie zakończone pomyślnie.")
    except FileNotFoundError:
        print("BŁĄD: Nie znaleziono pliku styles.zip na Dysku Google! Sprawdź ścieżkę.")
else:
    print("Dane są już rozpakowane, pomijam ten krok.")

# ==========================================
# 4. ŁADOWANIE MODELU (SOTA: )
# ==========================================
# Wykorzystujemy model 7B parametrów, który mieści się na T4 GPU w Colabie
MODEL_NAME = "Qwen/Qwen3-Embedding-4B"

print(f"\nŁadowanie modelu: {MODEL_NAME}...")
try:
    # Ładujemy w fp16, żeby w ogóle dało się oddychać
    model = SentenceTransformer(
        MODEL_NAME,
        trust_remote_code=True,
        model_kwargs={"torch_dtype": torch.float16}
    )
    model.max_seq_length = 4096
    print("Model załadowany. Trzymaj kciuki za VRAM.")
except Exception as e:
    print(f"\n❌ BŁĄD ŁADOWANIA MODELU: {e}")
    raise e
# ==========================================
# 5. FUNKCJE POMOCNICZE (ETL)
# ==========================================

def clean_html(raw_html):
    """Usuwa tagi HTML i zbędne białe znaki z tekstu."""
    if not isinstance(raw_html, str):
        return ""
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json_data(product_id):
    """Wczytuje dane produktu z pliku JSON."""
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            return json.load(f).get('data', {})
    except Exception:
        return {}

def build_complex_text(info, description):
    """
    Tworzy bogaty semantycznie opis produktu w języku naturalnym.
    Łączy dane z CSV (kategorie, płeć) oraz JSON (marka, materiał, krój).
    """
    cechy = []

    # 1. Podstawowe cechy (z CSV i JSON)
    if info.get('gender'): cechy.append(info['gender'].lower())
    if info.get('ageGroup'): cechy.append(info['ageGroup'].lower())
    if info.get('baseColour'): cechy.append(info['baseColour'].lower())

    # 2. Atrybuty szczegółowe produktu
    if info.get('pattern'): cechy.append(info['pattern'].lower())
    if info.get('fit'): cechy.append(info['fit'].lower())
    if info.get('fabric'): cechy.append(info['fabric'].lower())
    if info.get('sleeve length'): cechy.append(info['sleeve length'].lower())
    if info.get('collar'): cechy.append(info['collar'].lower())
    if info.get('articleType'): cechy.append(info['articleType'].lower())

    cechy_str = ', '.join([c for c in cechy if c])

    # 3. Dodatkowe metadane
    size_str = f"Size: {info.get('body or Garment Size')}" if info.get('body or Garment Size') else ""
    brand_str = f"by {info.get('brandName')}" if info.get('brandName') else ""
    usage_str = f"for {info.get('usage').lower()}" if info.get('usage') else ""
    season_str = f"{info.get('season').capitalize()}" if info.get('season') else ""
    cat_str = f"category: {info.get('masterCategory').lower()}" if info.get('masterCategory') else ""
    # Dodane subcategory zgodnie z wersją RTX
    subcat_str = f"subcategory: {info.get('subCategory').lower()}" if info.get('subCategory') else ""

    # 4. Konstrukcja zdania
    opis = "This is a"
    if season_str: opis += f" {season_str}"
    if cechy_str: opis += f" {cechy_str}"
    if brand_str: opis += f" {brand_str}"
    if size_str: opis += f", {size_str}"
    if usage_str: opis += f", {usage_str}"
    if cat_str: opis += f", {cat_str}"
    if subcat_str: opis += f", {subcat_str}"

    opis = opis.strip(", ") + "."

    # 5. Doklejenie oryginalnego opisu tekstowego
    if description:
        opis += f" Description: {description}"

    return opis

# ==========================================
# 6. GŁÓWNA PĘTLA PRZETWARZANIA
# ==========================================

# Konfiguracja pól do ekstrakcji
selected_csv_features = ['articleType', 'baseColour', 'gender', 'masterCategory', 'season', 'subCategory', 'usage']
selected_json_features = ['brandName', 'ageGroup']
selected_json_attributes = {
    'body or Garment Size': ['body or Garment Size', 'Body or Garment Size'],
    'collar': ['collar', 'Collar'],
    'fabric': ['fabric', 'Fabric'],
    'fit': ['fit', 'Fit'],
    'pattern': ['pattern', 'Pattern'],
    'sleeve length': ['sleeve length', 'Sleeve Length']
}

print("\nWczytywanie pliku CSV...")
try:
    df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
    df = df.set_index('id')
    product_ids = df.index
    print(f"Znaleziono {len(product_ids)} produktów.")
except FileNotFoundError:
    print("BŁĄD: Nie znaleziono pliku styles.csv!")
    product_ids = []

# Bufory na dane
vectors = {}
batch_texts = []
batch_ids = []

# Rozmiar wsadu (Batch Size) - 16 jest bezpieczne dla 7B modelu na 16GB VRAM
BATCH_SIZE = 4

print(f"\nRozpoczynam generowanie embeddingów dla {len(product_ids)} produktów...")

for pid in tqdm(product_ids, desc="Postęp"):
    # A. Pobranie danych surowych
    csv_row = df.loc[pid].fillna('').to_dict()
    json_data = read_json_data(pid)

    # B. Agregacja informacji do słownika 'info'
    info = {f: csv_row.get(f, '') for f in selected_csv_features}
    for f in selected_json_features:
        info[f] = json_data.get(f, '')

    # C. Wyciąganie atrybutów zagnieżdżonych (articleAttributes)
    attrs = json_data.get("articleAttributes", {})
    if isinstance(attrs, dict):
        for attr_name, keys in selected_json_attributes.items():
            found_val = ''
            for k in keys:
                if k in attrs:
                    found_val = attrs[k]
                    break
            info[attr_name] = found_val
    else:
        for attr_name in selected_json_attributes:
            info[attr_name] = ''

    # D. Czyszczenie opisu HTML
    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    # E. Budowa tekstu wejściowego dla modelu
    final_text = build_complex_text(info, description)

    batch_texts.append(final_text)
    batch_ids.append(pid)

    # F. Generowanie wektorów (gdy uzbiera się pełny batch)
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

# G. Przetworzenie pozostałych produktów (ostatni niepełny batch)
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# ==========================================
# 7. ZAPIS WYNIKÓW
# ==========================================
print(f"\nPrzetwarzanie zakończone. Wygenerowano {len(vectors)} wektorów.")

# Zapis pliku .pkl (słownik {id: vector})
pkl_output_path = os.path.join(OUTPUT_DIR, 'product_vectors_qwen_7b.pkl')
print(f"Zapisywanie pickle do: {pkl_output_path}...")
with open(pkl_output_path, 'wb') as f:
    pickle.dump(vectors, f)

# Zapis pliku .csv (id, vector) - dla łatwego podglądu lub importu do bazy
csv_output_path = os.path.join(OUTPUT_DIR, 'product_vectors_qwen_7b.csv')
print(f"Zapisywanie CSV do: {csv_output_path}...")

# Konwersja do formatu listy dla CSV (aby uniknąć problemów z numpy array przy zapisie)
csv_data = [{'id': k, 'vector': v.tolist()} for k, v in vectors.items()]
pd.DataFrame(csv_data).to_csv(csv_output_path, index=False)

print("\n--- PROCES ZAKOŃCZONY SUKCESEM ---")

2nd

In [None]:
# ==========================================
# 1. KONFIGURACJA ŚRODOWISKA
# ==========================================
# Instalacja wymaganych bibliotek (uruchomi się cicho dzięki -q)
!pip install -q sentence-transformers accelerate

import os
import json
import re
import pickle
import zipfile
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
from google.colab import drive

# Montowanie Dysku Google
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ==========================================
# 2. DEFINICJA ŚCIEŻEK (Dostosuj folder!)
# ==========================================
# Folder na Twoim Dysku Google, gdzie trzymasz styles.zip i styles.csv
DRIVE_FOLDER = "/content/drive/MyDrive/mamamagisterka"

# Ścieżki wejściowe
ZIP_PATH = os.path.join(DRIVE_FOLDER, 'styles.zip')
CSV_PATH = os.path.join(DRIVE_FOLDER, 'styles.csv')

# Folder roboczy w Colabie (szybki dysk tymczasowy)
WORK_DIR = "/content/data"
JSON_DIR = os.path.join(WORK_DIR, 'styles')
os.makedirs(WORK_DIR, exist_ok=True)

# Folder wyjściowy (zapis wyników na Dysk Google)
OUTPUT_DIR = DRIVE_FOLDER

# ==========================================
# 3. PRZYGOTOWANIE DANYCH
# ==========================================
# Rozpakowanie plików JSON z archiwum ZIP (tylko jeśli jeszcze nie istnieją)
if not os.path.exists(JSON_DIR):
    print(f"Rozpakowywanie {ZIP_PATH} do {WORK_DIR}...")
    try:
        with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
            zip_ref.extractall(WORK_DIR)
        print("Rozpakowywanie zakończone pomyślnie.")
    except FileNotFoundError:
        print("BŁĄD: Nie znaleziono pliku styles.zip na Dysku Google! Sprawdź ścieżkę.")
else:
    print("Dane są już rozpakowane, pomijam ten krok.")

# ==========================================
# 4. ŁADOWANIE MODELU (SOTA: )
# ==========================================
# Wykorzystujemy model 7B parametrów, który mieści się na T4 GPU w Colabie
MODEL_NAME = "Qwen/Qwen3-Embedding-4B"

print(f"\nŁadowanie modelu: {MODEL_NAME}...")
try:
    # Ładujemy w fp16, żeby w ogóle dało się oddychać
    model = SentenceTransformer(
        MODEL_NAME,
        trust_remote_code=True,
        model_kwargs={"torch_dtype": torch.float16}
    )
    model.max_seq_length = 4096
    print("Model załadowany. Trzymaj kciuki za VRAM.")
except Exception as e:
    print(f"\n❌ BŁĄD ŁADOWANIA MODELU: {e}")
    raise e
# ==========================================
# 5. FUNKCJE POMOCNICZE (ETL)
# ==========================================

def clean_html(raw_html):
    """Usuwa tagi HTML i zbędne białe znaki z tekstu."""
    if not isinstance(raw_html, str):
        return ""
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json_data(product_id):
    """Wczytuje dane produktu z pliku JSON."""
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            return json.load(f).get('data', {})
    except Exception:
        return {}

def build_complex_text(info: dict, description: str | None) -> str:
    """
    Tworzy bogaty semantycznie opis produktu w języku naturalnym.
    Łączy dane z CSV (kategorie, płeć) oraz JSON (marka, materiał, krój).
    """
    cechy = []

    # 1. Podstawowe cechy (z CSV i JSON)
    if info.get('gender'): cechy.append(info['gender'].lower())
    if info.get('ageGroup'): cechy.append(info['ageGroup'].lower())
    if info.get('baseColour'): cechy.append(info['baseColour'].lower())

    # 2. Atrybuty szczegółowe produktu
    if info.get('pattern'): cechy.append(info['pattern'].lower())
    if info.get('fit'): cechy.append(info['fit'].lower())
    if info.get('fabric'): cechy.append(info['fabric'].lower())
    if info.get('sleeve length'): cechy.append(info['sleeve length'].lower())
    if info.get('collar'): cechy.append(info['collar'].lower())
    if info.get('articleType'): cechy.append(info['articleType'].lower())

    cechy_str = ', '.join([c for c in cechy if c])

    # 3. Dodatkowe metadane
    size_str = f"Size: {info.get('body or Garment Size')}" if info.get('body or Garment Size') else ""
    brand_str = f"by {info.get('brandName')}" if info.get('brandName') else ""
    usage_str = f"for {info.get('usage').lower()}" if info.get('usage') else ""
    season_str = f"{info.get('season').capitalize()}" if info.get('season') else ""
    cat_str = f"category: {info.get('masterCategory').lower()}" if info.get('masterCategory') else ""
    # Dodane subcategory zgodnie z wersją RTX
    subcat_str = f"subcategory: {info.get('subCategory').lower()}" if info.get('subCategory') else ""

    # 4. Konstrukcja zdania
    opis = "This is a"
    if season_str: opis += f" {season_str}"
    if cechy_str: opis += f" {cechy_str}"
    if brand_str: opis += f" {brand_str}"
    if size_str: opis += f", {size_str}"
    if usage_str: opis += f", {usage_str}"
    if cat_str: opis += f", {cat_str}"
    if subcat_str: opis += f", {subcat_str}"

    opis = opis.strip(", ") + "."

    # 5. Doklejenie oryginalnego opisu tekstowego
    if description:
        opis += f" Description: {description}"

    return opis

# ==========================================
# 6. GŁÓWNA PĘTLA PRZETWARZANIA
# ==========================================

# Konfiguracja pól do ekstrakcji
selected_csv_features = ['articleType', 'baseColour', 'gender', 'masterCategory', 'season', 'subCategory', 'usage']
selected_json_features = ['brandName', 'ageGroup']
selected_json_attributes = {
    'body or Garment Size': ['body or Garment Size', 'Body or Garment Size'],
    'collar': ['collar', 'Collar'],
    'fabric': ['fabric', 'Fabric'],
    'fit': ['fit', 'Fit'],
    'pattern': ['pattern', 'Pattern'],
    'sleeve length': ['sleeve length', 'Sleeve Length']
}

print("\nWczytywanie pliku CSV...")
try:
    df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
    df = df.set_index('id')
    product_ids = df.index
    print(f"Znaleziono {len(product_ids)} produktów.")
except FileNotFoundError:
    print("BŁĄD: Nie znaleziono pliku styles.csv!")
    product_ids = []

# Bufory na dane
vectors = {}
batch_texts = []
batch_ids = []

# Rozmiar wsadu (Batch Size) - 16 jest bezpieczne dla 7B modelu na 16GB VRAM
BATCH_SIZE = 4

print(f"\nRozpoczynam generowanie embeddingów dla {len(product_ids)} produktów...")

for pid in tqdm(product_ids, desc="Postęp"):
    # A. Pobranie danych surowych
    csv_row = df.loc[pid].fillna('').to_dict()
    json_data = read_json_data(pid)

    # B. Agregacja informacji do słownika 'info'
    info = {f: csv_row.get(f, '') for f in selected_csv_features}
    for f in selected_json_features:
        info[f] = json_data.get(f, '')

    # C. Wyciąganie atrybutów zagnieżdżonych (articleAttributes)
    attrs = json_data.get("articleAttributes", {})
    if isinstance(attrs, dict):
        for attr_name, keys in selected_json_attributes.items():
            found_val = ''
            for k in keys:
                if k in attrs:
                    found_val = attrs[k]
                    break
            info[attr_name] = found_val
    else:
        for attr_name in selected_json_attributes:
            info[attr_name] = ''

    # D. Czyszczenie opisu HTML
    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    # E. Budowa tekstu wejściowego dla modelu
    final_text = build_complex_text(info, description)

    batch_texts.append(final_text)
    batch_ids.append(pid)

    # F. Generowanie wektorów (gdy uzbiera się pełny batch)
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

# G. Przetworzenie pozostałych produktów (ostatni niepełny batch)
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# ==========================================
# 7. ZAPIS WYNIKÓW
# ==========================================
print(f"\nPrzetwarzanie zakończone. Wygenerowano {len(vectors)} wektorów.")

# Zapis pliku .pkl (słownik {id: vector})
pkl_output_path = os.path.join(OUTPUT_DIR, 'product_vectors_2nd.pkl')
print(f"Zapisywanie pickle do: {pkl_output_path}...")
with open(pkl_output_path, 'wb') as f:
    pickle.dump(vectors, f)

# Zapis pliku .csv (id, vector) - dla łatwego podglądu lub importu do bazy
csv_output_path = os.path.join(OUTPUT_DIR, 'product_vectors_2nd.csv')
print(f"Zapisywanie CSV do: {csv_output_path}...")

# Konwersja do formatu listy dla CSV (aby uniknąć problemów z numpy array przy zapisie)
csv_data = [{'id': k, 'vector': v.tolist()} for k, v in vectors.items()]
pd.DataFrame(csv_data).to_csv(csv_output_path, index=False)

print("\n--- PROCES ZAKOŃCZONY SUKCESEM ---")

3rd

In [None]:
# ==========================================
# 1. KONFIGURACJA ŚRODOWISKA
# ==========================================
# Instalacja wymaganych bibliotek (uruchomi się cicho dzięki -q)
!pip install -q sentence-transformers accelerate

import os
import json
import re
import pickle
import zipfile
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
from google.colab import drive

# Montowanie Dysku Google
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ==========================================
# 2. DEFINICJA ŚCIEŻEK (Dostosuj folder!)
# ==========================================
# Folder na Twoim Dysku Google, gdzie trzymasz styles.zip i styles.csv
DRIVE_FOLDER = "/content/drive/MyDrive/mamamagisterka"

# Ścieżki wejściowe
ZIP_PATH = os.path.join(DRIVE_FOLDER, 'styles.zip')
CSV_PATH = os.path.join(DRIVE_FOLDER, 'styles.csv')

# Folder roboczy w Colabie (szybki dysk tymczasowy)
WORK_DIR = "/content/data"
JSON_DIR = os.path.join(WORK_DIR, 'styles')
os.makedirs(WORK_DIR, exist_ok=True)

# Folder wyjściowy (zapis wyników na Dysk Google)
OUTPUT_DIR = DRIVE_FOLDER

# ==========================================
# 3. PRZYGOTOWANIE DANYCH
# ==========================================
# Rozpakowanie plików JSON z archiwum ZIP (tylko jeśli jeszcze nie istnieją)
if not os.path.exists(JSON_DIR):
    print(f"Rozpakowywanie {ZIP_PATH} do {WORK_DIR}...")
    try:
        with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
            zip_ref.extractall(WORK_DIR)
        print("Rozpakowywanie zakończone pomyślnie.")
    except FileNotFoundError:
        print("BŁĄD: Nie znaleziono pliku styles.zip na Dysku Google! Sprawdź ścieżkę.")
else:
    print("Dane są już rozpakowane, pomijam ten krok.")

# ==========================================
# 4. ŁADOWANIE MODELU (SOTA: )
# ==========================================
MODEL_NAME = "Qwen/Qwen3-Embedding-4B"

print(f"\nŁadowanie modelu: {MODEL_NAME}...")
try:
    # Ładujemy w fp16, żeby w ogóle dało się oddychać
    model = SentenceTransformer(
        MODEL_NAME,
        trust_remote_code=True,
        model_kwargs={"torch_dtype": torch.float16}
    )
    model.max_seq_length = 4096
    print("Model załadowany. Trzymaj kciuki za VRAM.")
except Exception as e:
    print(f"\n❌ BŁĄD ŁADOWANIA MODELU: {e}")
    raise e
# ==========================================
# 5. FUNKCJE POMOCNICZE (ETL)
# ==========================================

def clean_html(raw_html):
    """Usuwa tagi HTML i zbędne białe znaki z tekstu."""
    if not isinstance(raw_html, str):
        return ""
    cleanr = re.compile('<.*?>')
    return re.sub(cleanr, '', raw_html).replace('\n', ' ').strip()

def read_json_data(product_id):
    """Wczytuje dane produktu z pliku JSON."""
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            return json.load(f).get('data', {})
    except Exception:
        return {}

def build_complex_text(info, description):
    """
    Tworzy bogaty semantycznie opis produktu w języku naturalnym.
    Łączy dane z CSV (kategorie, płeć) oraz JSON (marka, materiał, krój).
    """
    cechy = []

    # 1. Podstawowe cechy (z CSV i JSON)
    if info.get('gender'): cechy.append(info['gender'].lower())
    if info.get('ageGroup'): cechy.append(info['ageGroup'].lower())
    if info.get('baseColour'): cechy.append(info['baseColour'].lower())
    if info.get('articleType'): cechy.append(info['articleType'].lower())

    cechy_str = ', '.join([c for c in cechy if c])

    # 3. Dodatkowe metadane
    size_str = f"Size: {info.get('body or Garment Size')}" if info.get('body or Garment Size') else ""
    usage_str = f"for {info.get('usage').lower()}" if info.get('usage') else ""
    season_str = f"{info.get('season').capitalize()}" if info.get('season') else ""
    cat_str = f"category: {info.get('masterCategory').lower()}" if info.get('masterCategory') else ""
    subcat_str = f"subcategory: {info.get('subCategory').lower()}" if info.get('subCategory') else ""

    # 4. Konstrukcja zdania
    opis = "This is a"
    if season_str: opis += f" {season_str}"
    if cechy_str: opis += f" {cechy_str}"
    if size_str: opis += f", {size_str}"
    if usage_str: opis += f", {usage_str}"
    if cat_str: opis += f", {cat_str}"
    if subcat_str: opis += f", {subcat_str}"

    opis = opis.strip(", ") + "."

    # 5. Doklejenie oryginalnego opisu tekstowego
    if description:
        opis += f" Description: {description}"

    return opis

# ==========================================
# 6. GŁÓWNA PĘTLA PRZETWARZANIA
# ==========================================

# Konfiguracja pól do ekstrakcji
selected_csv_features = ['articleType', 'baseColour', 'gender', 'masterCategory', 'season', 'subCategory', 'usage']
selected_json_features = [ 'ageGroup']

print("\nWczytywanie pliku CSV...")
try:
    df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
    df = df.set_index('id')
    product_ids = df.index
    print(f"Znaleziono {len(product_ids)} produktów.")
except FileNotFoundError:
    print("BŁĄD: Nie znaleziono pliku styles.csv!")
    product_ids = []

# Bufory na dane
vectors = {}
batch_texts = []
batch_ids = []

# Rozmiar wsadu (Batch Size) - 16 jest bezpieczne dla 7B modelu na 16GB VRAM
BATCH_SIZE = 4

print(f"\nRozpoczynam generowanie embeddingów dla {len(product_ids)} produktów...")

for pid in tqdm(product_ids, desc="Postęp"):
    # A. Pobranie danych surowych
    csv_row = df.loc[pid].fillna('').to_dict()
    json_data = read_json_data(pid)

    # B. Agregacja informacji do słownika 'info'
    info = {f: csv_row.get(f, '') for f in selected_csv_features}
    for f in selected_json_features:
        info[f] = json_data.get(f, '')

    # D. Czyszczenie opisu HTML
    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", '') if isinstance(desc_obj, dict) else ''
    description = clean_html(raw_desc)

    # E. Budowa tekstu wejściowego dla modelu
    final_text = build_complex_text(info, description)

    batch_texts.append(final_text)
    batch_ids.append(pid)

    # F. Generowanie wektorów (gdy uzbiera się pełny batch)
    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

# G. Przetworzenie pozostałych produktów (ostatni niepełny batch)
if batch_texts:
    embeddings = model.encode(batch_texts, show_progress_bar=False, batch_size=BATCH_SIZE)
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# ==========================================
# 7. ZAPIS WYNIKÓW
# ==========================================
print(f"\nPrzetwarzanie zakończone. Wygenerowano {len(vectors)} wektorów.")

# Zapis pliku .pkl (słownik {id: vector})
pkl_output_path = os.path.join(OUTPUT_DIR, 'product_vectors_3rd.pkl')
print(f"Zapisywanie pickle do: {pkl_output_path}...")
with open(pkl_output_path, 'wb') as f:
    pickle.dump(vectors, f)

# Zapis pliku .csv (id, vector) - dla łatwego podglądu lub importu do bazy
csv_output_path = os.path.join(OUTPUT_DIR, 'product_vectors_3rd.csv')
print(f"Zapisywanie CSV do: {csv_output_path}...")

# Konwersja do formatu listy dla CSV (aby uniknąć problemów z numpy array przy zapisie)
csv_data = [{'id': k, 'vector': v.tolist()} for k, v in vectors.items()]
pd.DataFrame(csv_data).to_csv(csv_output_path, index=False)

print("\n--- PROCES ZAKOŃCZONY SUKCESEM ---")

4

In [None]:
# ==========================================
# 1. KONFIGURACJA ŚRODOWISKA
# ==========================================
!pip install -q sentence-transformers accelerate

import os
import json
import re
import pickle
import zipfile
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
from google.colab import drive

# Montowanie Dysku Google
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ==========================================
# 2. DEFINICJA ŚCIEŻEK
# ==========================================
DRIVE_FOLDER = "/content/drive/MyDrive/mamamagisterka"

ZIP_PATH = os.path.join(DRIVE_FOLDER, "styles.zip")
CSV_PATH = os.path.join(DRIVE_FOLDER, "styles.csv")

WORK_DIR = "/content/data"
JSON_DIR = os.path.join(WORK_DIR, "styles")
os.makedirs(WORK_DIR, exist_ok=True)

OUTPUT_DIR = DRIVE_FOLDER

# ==========================================
# 3. PRZYGOTOWANIE DANYCH
# ==========================================
if not os.path.exists(JSON_DIR):
    print(f"Rozpakowywanie {ZIP_PATH} do {WORK_DIR}...")
    try:
        with zipfile.ZipFile(ZIP_PATH, "r") as zip_ref:
            zip_ref.extractall(WORK_DIR)
        print("Rozpakowywanie zakończone pomyślnie.")
    except FileNotFoundError:
        print("BŁĄD: Nie znaleziono pliku styles.zip na Dysku Google! Sprawdź ścieżkę.")
else:
    print("Dane są już rozpakowane, pomijam ten krok.")

# ==========================================
# 4. ŁADOWANIE MODELU
# ==========================================
MODEL_NAME = "Qwen/Qwen3-Embedding-4B"

print(f"\nŁadowanie modelu: {MODEL_NAME}...")
try:
    model = SentenceTransformer(
        MODEL_NAME,
        trust_remote_code=True,
        model_kwargs={"torch_dtype": torch.float16},
    )
    model.max_seq_length = 4096
    print("Model załadowany.")
except Exception as e:
    print(f"\n❌ BŁĄD ŁADOWANIA MODELU: {e}")
    raise e

# ==========================================
# 5. FUNKCJE POMOCNICZE (ETL)
# ==========================================

def clean_html(raw_html):
    """Usuwa tagi HTML i zbędne białe znaki z tekstu."""
    if not isinstance(raw_html, str):
        return ""
    cleanr = re.compile("<.*?>")
    return re.sub(cleanr, "", raw_html).replace("\n", " ").strip()


def read_json_data(product_id):
    """Wczytuje dane produktu z pliku JSON."""
    json_path = os.path.join(JSON_DIR, f"{product_id}.json")
    if not os.path.exists(json_path):
        return {}
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            return json.load(f).get("data", {})
    except Exception:
        return {}


def build_description_text(description: str) -> str:
    """
    Buduje prosty tekst wejściowy oparty wyłącznie na opisie produktu.
    """
    description = (description or "").strip()
    if description:
        return f"This is a {description}"
    return "This is a product without description."

# ==========================================
# 6. GŁÓWNA PĘTLA PRZETWARZANIA
# ==========================================
print("\nWczytywanie pliku CSV...")
try:
    df = pd.read_csv(CSV_PATH, on_bad_lines="skip")
    df = df.set_index("id")
    product_ids = df.index
    print(f"Znaleziono {len(product_ids)} produktów.")
except FileNotFoundError:
    print("BŁĄD: Nie znaleziono pliku styles.csv!")
    product_ids = []

vectors = {}
batch_texts = []
batch_ids = []

BATCH_SIZE = 4

print(f"\nRozpoczynam generowanie embeddingów dla {len(product_ids)} produktów...")

for pid in tqdm(product_ids, desc="Postęp"):
    json_data = read_json_data(pid)

    prod_descr = json_data.get("productDescriptors", {})
    desc_obj = prod_descr.get("description", {}) if isinstance(prod_descr, dict) else {}
    raw_desc = desc_obj.get("value", "") if isinstance(desc_obj, dict) else ""
    description = clean_html(raw_desc)

    final_text = build_description_text(description)

    batch_texts.append(final_text)
    batch_ids.append(pid)

    if len(batch_texts) >= BATCH_SIZE:
        embeddings = model.encode(
            batch_texts,
            show_progress_bar=False,
            batch_size=BATCH_SIZE,
        )
        for idx, vector in zip(batch_ids, embeddings):
            vectors[idx] = vector
        batch_texts = []
        batch_ids = []

if batch_texts:
    embeddings = model.encode(
        batch_texts,
        show_progress_bar=False,
        batch_size=BATCH_SIZE,
    )
    for idx, vector in zip(batch_ids, embeddings):
        vectors[idx] = vector

# ==========================================
# 7. ZAPIS WYNIKÓW
# ==========================================
print(f"\nPrzetwarzanie zakończone. Wygenerowano {len(vectors)} wektorów.")

pkl_output_path = os.path.join(OUTPUT_DIR, "product_vectors_only_description_qwen.pkl")
print(f"Zapisywanie pickle do: {pkl_output_path}...")
with open(pkl_output_path, "wb") as f:
    pickle.dump(vectors, f)

csv_output_path = os.path.join(OUTPUT_DIR, "product_vectors_only_description_qwen.csv")
print(f"Zapisywanie CSV do: {csv_output_path}...")

csv_data = [{"id": k, "vector": v.tolist()} for k, v in vectors.items()]
pd.DataFrame(csv_data).to_csv(csv_output_path, index=False)

print("\n--- PROCES ZAKOŃCZONY SUKCESEM ---")


In [3]:
import ast

s = '[-0.0004425048828125, -0.032257080078125, 0.0416259765625, -0.0194091796875, -0.0019931793212890625, 0.07598876953125, 0.0819091796875, 0.0005412101745605469, 0.009552001953125, -0.0011720657348632812, -0.032867431640625, 0.0168914794921875, -0.006206512451171875, -0.01959228515625, 0.04058837890625, -0.0235595703125, 0.034637451171875, -0.06695556640625, 0.00783538818359375, -0.000789642333984375, -0.023895263671875, -0.0180816650390625, 0.01049041748046875, -0.0124969482421875, 0.013153076171875, -0.00368499755859375, -0.0184478759765625, 0.003875732421875, 0.040374755859375, -0.00102996826171875, -0.0019378662109375, -0.0003457069396972656, 0.039337158203125, -0.00904083251953125, 0.001026153564453125, -0.0220184326171875, 0.01326751708984375, -0.004974365234375, 0.0214691162109375, -0.04248046875, 0.0187225341796875, 0.0167388916015625, 0.01422119140625, 0.00847625732421875, 0.021484375, 0.00940704345703125, -0.00908660888671875, 0.032928466796875, -0.005016326904296875, -0.01473236083984375, 0.0022029876708984375, 0.0095062255859375, -0.0106201171875, -0.0264892578125, 0.045562744140625, 0.0011196136474609375, 0.0421142578125, -0.004871368408203125, -0.0009670257568359375, -0.01088714599609375, -0.0032405853271484375, -0.018035888671875, -0.0201568603515625, -0.02374267578125, -0.008575439453125, -0.0293731689453125, -0.00641632080078125, -0.0234222412109375, 0.01375579833984375, -0.0107574462890625, -0.027099609375, 0.04437255859375, -0.00881195068359375, 0.00615692138671875, 0.0224761962890625, -0.03424072265625, -0.016632080078125, 0.025634765625, -0.029388427734375, 0.02520751953125, 0.0102996826171875, -0.020111083984375, 0.003795623779296875, 0.00792694091796875, 0.0229339599609375, 0.015625, 0.0169677734375, 0.0191802978515625, -0.03411865234375, -0.0011110305786132812, 0.01024627685546875, 0.019256591796875, 0.01393890380859375, 0.032989501953125, 0.00600433349609375, 0.0084075927734375, 0.0024738311767578125, -0.022979736328125, -0.0025806427001953125, -0.0009641647338867188, -0.00870513916015625, 0.01496124267578125, 0.0006818771362304688, -0.0047454833984375, -0.01224517822265625, -0.022796630859375, -0.03106689453125, 0.0095977783203125, 0.01245880126953125, 0.013427734375, 0.007659912109375, -0.00091552734375, -0.0068511962890625, -0.02587890625, -0.00011432170867919922, -0.00904083251953125, -0.0104827880859375, 0.00919342041015625, -0.00766754150390625, 0.01131439208984375, -0.0038433074951171875, 0.01451873779296875, -0.007648468017578125, -0.00783538818359375, 0.01496124267578125, 0.00994873046875, -0.0112457275390625, 0.00444793701171875, 0.01406097412109375, 0.0203094482421875, 0.01178741455078125, -0.0205230712890625, 0.00760650634765625, -0.0025959014892578125, -0.00960540771484375, -0.018524169921875, -0.0031890869140625, -0.003162384033203125, -0.015045166015625, -0.0006165504455566406, -0.00457763671875, -0.00243377685546875, 0.0029754638671875, 0.031280517578125, -0.01245880126953125, 0.00020766258239746094, -0.00217437744140625, 0.011993408203125, 0.022674560546875, 0.00606536865234375, 0.01061248779296875, 0.005428314208984375, 0.004734039306640625, 0.0260162353515625, -0.0020656585693359375, 0.0117034912109375, -0.001361846923828125, 0.018829345703125, 0.004199981689453125, 0.014801025390625, 0.01267242431640625, 0.0156097412109375, -0.00557708740234375, 0.038421630859375, 0.005840301513671875, 0.0156707763671875, -0.03179931640625, 0.00524139404296875, -0.00853729248046875, -0.003936767578125, -0.00461578369140625, -0.0184478759765625, -0.01239776611328125, 0.0005388259887695312, -0.020233154296875, -0.0279388427734375, -0.03564453125, -0.01457977294921875, -0.006412506103515625, 0.011688232421875, 0.018524169921875, 0.00766754150390625, 0.0203399658203125, -0.003932952880859375, 0.002960205078125, 0.007488250732421875, -0.006435394287109375, 0.0077972412109375, 0.0202178955078125, 0.005077362060546875, -0.00884246826171875, -0.0174102783203125, -0.0159912109375, -0.004016876220703125, -0.00982666015625, -0.0229644775390625, -0.0299224853515625, -0.001316070556640625, 0.00991058349609375, 0.0181732177734375, 0.01517486572265625, -0.00039696693420410156, 0.0209808349609375, 0.00634765625, -0.0004630088806152344, -0.0014486312866210938, -0.0006651878356933594, -0.015869140625, 0.0036716461181640625, -0.004039764404296875, -0.0019855499267578125, -0.0123443603515625, -0.00688934326171875, -0.048492431640625, -0.016998291015625, 0.00495147705078125, -0.029144287109375, -0.027740478515625, 0.01995849609375, -0.01313018798828125, 0.00765228271484375, 0.00382232666015625, 0.0006175041198730469, 0.0223236083984375, -0.026397705078125, 0.00768280029296875, 0.004962921142578125, -0.00909423828125, 0.0033092498779296875, 0.00414276123046875, 0.01100921630859375, 0.0181732177734375, -0.002117156982421875, -0.0170745849609375, 0.004985809326171875, -0.00414276123046875, -0.004390716552734375, -0.003021240234375, 0.00873565673828125, 0.00986480712890625, -0.05560302734375, -0.0176849365234375, 0.0066375732421875, -0.00732421875, -0.0026493072509765625, -0.011810302734375, 0.006443023681640625, -0.03704833984375, 0.003765106201171875, -0.0177154541015625, 0.0108795166015625, -0.004611968994140625, -0.00925445556640625, -0.0261688232421875, 0.0163116455078125, -0.00849151611328125, -0.014801025390625, -0.0035762786865234375, 0.00925445556640625, -0.0033359527587890625, -0.0022754669189453125, -0.007080078125, -0.01178741455078125, -0.006587982177734375, -0.016204833984375, -0.0275421142578125, 0.036376953125, 0.0044097900390625, -0.00357818603515625, -0.01018524169921875, 0.04736328125, -0.006290435791015625, 0.005401611328125, 0.01110076904296875, 0.00861358642578125, -0.01468658447265625, 0.00966644287109375, 0.0029163360595703125, -0.0093536376953125, 0.0214996337890625, -0.030792236328125, -0.010467529296875, 0.00826263427734375, 0.0002574920654296875, -0.01128387451171875, 0.00421905517578125, -0.0076904296875, -0.0024566650390625, -0.0222320556640625, 0.005260467529296875, 0.01824951171875, -0.01215362548828125, 0.0229339599609375, -0.0015039443969726562, 0.01248931884765625, -0.0056915283203125, -0.0026874542236328125, -0.01479339599609375, -0.00016117095947265625, -0.006805419921875, 0.0043182373046875, 0.002227783203125, 0.005283355712890625, 0.01934814453125, -0.00864410400390625, 0.02032470703125, -0.0020427703857421875, 0.0123748779296875, -0.0030117034912109375, -0.011016845703125, 0.015869140625, -0.0067596435546875, 0.0205535888671875, 0.014007568359375, 0.0116424560546875, -0.0224456787109375, 0.004444122314453125, 0.00167083740234375, -0.033416748046875, 0.01397705078125, 0.013824462890625, -0.032440185546875, -0.02532958984375, -0.01123809814453125, -0.03631591796875, 0.0278778076171875, 0.019775390625, 0.02203369140625, 0.0082244873046875, 0.033599853515625, 0.0160675048828125, 0.0173492431640625, -0.031951904296875, -0.0221405029296875, 0.0013399124145507812, 0.018524169921875, -0.0015363693237304688, 0.03265380859375, 0.005184173583984375, 0.01212310791015625, -0.01143646240234375, 0.002902984619140625, -0.02294921875, 0.012664794921875, -0.038604736328125, -0.00254058837890625, -0.00318145751953125, 0.01354217529296875, 0.0004596710205078125, -0.031524658203125, 0.0111846923828125, -0.003753662109375, -0.00594329833984375, 0.0095367431640625, -0.0131683349609375, 0.0287322998046875, -0.01001739501953125, 0.03387451171875, -0.005706787109375, -0.013519287109375, -0.0289459228515625, 0.00408935546875, -0.004093170166015625, 0.01493072509765625, -0.019561767578125, -0.00888824462890625, -0.010711669921875, 0.027679443359375, -0.005718231201171875, -0.02203369140625, 0.0163116455078125, -0.004291534423828125, 0.040557861328125, 0.0169677734375, 0.01036834716796875, -0.0022640228271484375, -0.0196380615234375, -0.003261566162109375, -0.01611328125, 0.005157470703125, 0.01373291015625, -0.022064208984375, -0.00568389892578125, 0.032684326171875, -0.03125, 0.01407623291015625, 0.01071929931640625, 0.01049041748046875, -0.042877197265625, 0.01416015625, -0.0157012939453125, 0.01190948486328125, 0.006969451904296875, 0.03582763671875, 0.0160980224609375, 0.0221405029296875, 0.001453399658203125, 0.01126861572265625, 0.0252838134765625, -0.025726318359375, 0.002712249755859375, 0.00571441650390625, -0.01702880859375, -0.0217132568359375, -0.0019435882568359375, -0.013458251953125, -0.014007568359375, 0.0311279296875, -0.00021326541900634766, 0.001956939697265625, -0.0141143798828125, 0.01323699951171875, 0.00791168212890625, -0.00525665283203125, 0.007534027099609375, -0.00452423095703125, 0.00406646728515625, -0.020416259765625, 0.0151519775390625, 0.0027561187744140625, -0.006565093994140625, -0.0136260986328125, -0.0160980224609375, -0.002300262451171875, -0.007495880126953125, 0.0223846435546875, 0.004730224609375, -0.015228271484375, -0.030242919921875, 0.0240936279296875, -0.01287841796875, 0.019073486328125, 0.005641937255859375, 0.01261138916015625, 0.003314971923828125, 0.00390625, 0.00738525390625, 0.01947021484375, -0.0151824951171875, -0.0117034912109375, 0.006572723388671875, 0.01512908935546875, 0.0074005126953125, 0.0200347900390625, -0.0155181884765625, -0.00307464599609375, -0.006198883056640625, -0.0197296142578125, 0.0115966796875, -0.00823211669921875, -0.0159912109375, 0.0155487060546875, 0.006755828857421875, -0.01544189453125, -0.01136016845703125, -0.01041412353515625, 0.004131317138671875, 0.004589080810546875, -0.005054473876953125, 0.0148162841796875, -0.081298828125, -0.0109100341796875, 0.04217529296875, -0.028533935546875, 0.0024509429931640625, -0.0160980224609375, -0.0297088623046875, -0.01499176025390625, -0.0134429931640625, 4.0590763092041016e-05, 0.00513458251953125, -0.02783203125, -0.014556884765625, 0.022857666015625, 0.00923919677734375, 0.0205535888671875, 0.0214691162109375, -8.493661880493164e-05, -0.01274871826171875, 0.039031982421875, -0.021484375, -0.00783538818359375, 0.020538330078125, 0.017425537109375, -0.02197265625, 0.010528564453125, 0.0079193115234375, 0.0297393798828125, 0.030670166015625, -0.0113372802734375, -0.0066070556640625, 0.009307861328125, 0.0106201171875, -0.027984619140625, -0.01451873779296875, 0.0096435546875, 0.0347900390625, -0.021392822265625, 0.01232147216796875, -0.0258026123046875, -0.014862060546875, 0.0316162109375, 0.0259857177734375, -0.002376556396484375, -0.03106689453125, -0.0224456787109375, 0.0034847259521484375, -0.044464111328125, -0.014923095703125, 0.003940582275390625, 0.01067352294921875, 0.01174163818359375, 0.01690673828125, -0.047576904296875, 0.0140838623046875, 0.0033397674560546875, 0.00939178466796875, -0.0180206298828125, 0.01267242431640625, 0.0186309814453125, -0.005161285400390625, -0.00945281982421875, 0.03851318359375, -0.01445770263671875, -0.005313873291015625, 0.031951904296875, 0.0208282470703125, -0.032623291015625, 0.00641632080078125, 0.026214599609375, -0.032440185546875, -0.004673004150390625, -0.0001665353775024414, 0.0276641845703125, -0.056640625, 0.0066375732421875, 0.0004987716674804688, 0.05438232421875, -0.0177154541015625, 0.00962066650390625, 0.00432586669921875, -0.005474090576171875, 0.0107269287109375, 0.01166534423828125, 0.00666046142578125, -0.02972412109375, 0.006076812744140625, 0.0214691162109375, -0.0153656005859375, 0.018890380859375, 0.0247802734375, 0.00479888916015625, 0.03363037109375, -0.050537109375, -0.034393310546875, 0.037506103515625, 0.024871826171875, -0.0008983612060546875, 0.01325225830078125, 0.0282745361328125, -0.011566162109375, -0.01251983642578125, -0.00860595703125, 0.03839111328125, 0.021697998046875, 0.007427215576171875, -0.044036865234375, 0.001190185546875, -0.031951904296875, 0.04229736328125, 0.0203094482421875, -0.0025691986083984375, 0.030426025390625, -0.005947113037109375, 0.021697998046875, 0.021636962890625, -0.006256103515625, 0.001178741455078125, -0.03363037109375, -0.030517578125, 0.0224761962890625, 0.002918243408203125, 0.0173187255859375, 0.01100921630859375, 0.0296173095703125, -0.007091522216796875, -0.01454925537109375, 0.011474609375, 0.00811767578125, 0.0179595947265625, -0.006816864013671875, 0.0234527587890625, 0.02264404296875, 0.0001519918441772461, 0.006992340087890625, -0.016754150390625, 0.0012845993041992188, 0.0013589859008789062, -0.01070404052734375, 0.0293121337890625, 0.01995849609375, -0.021331787109375, -0.02435302734375, 0.0205230712890625, 0.01168060302734375, -0.00839996337890625, 0.0014553070068359375, 0.04241943359375, -0.0100250244140625, -0.00736236572265625, 0.0152130126953125, -0.017547607421875, -0.005962371826171875, 0.0261993408203125, 0.00556182861328125, -0.036590576171875, -0.0254669189453125, 0.0005650520324707031, -0.01019287109375, -0.005153656005859375, 0.00865936279296875, -0.006053924560546875, -0.0287933349609375, -0.01922607421875, 0.0136260986328125, -0.05517578125, -0.0093841552734375, -0.011627197265625, -0.01325225830078125, -0.0270233154296875, 0.033538818359375, 0.0205535888671875, 0.03466796875, 0.0189971923828125, 0.033447265625, -0.0090484619140625, -0.052703857421875, 0.012359619140625, 0.0011272430419921875, -0.00321197509765625, 0.033111572265625, 0.031646728515625, -0.00891876220703125, -0.001064300537109375, -0.007228851318359375, -0.02178955078125, -0.040985107421875, 0.0277862548828125, -0.00395965576171875, 0.004947662353515625, -0.00946807861328125, 0.01374053955078125, 0.007205963134765625, -0.004451751708984375, -0.00641632080078125, -0.0143890380859375, 0.00792694091796875, -0.0302734375, 0.00908660888671875, -0.005016326904296875, 0.01015472412109375, -0.01166534423828125, -0.01351165771484375, 0.0038909912109375, -0.0199737548828125, -0.00963592529296875, -0.0267181396484375, -0.0019969940185546875, 0.041961669921875, -0.00188446044921875, -0.0161285400390625, 0.0149078369140625, 0.0012655258178710938, -0.00913238525390625, 0.03619384765625, -0.024932861328125, -0.0166778564453125, 0.019622802734375, 0.05450439453125, -0.0255279541015625, -0.01543426513671875, 0.0119781494140625, 0.019683837890625, -0.01454925537109375, -0.0198211669921875, -0.007640838623046875, -0.0218658447265625, -0.0017290115356445312, 0.04156494140625, 0.026763916015625, -0.00958251953125, -0.031219482421875, -0.0096435546875, 0.021728515625, 0.037994384765625, 0.0278167724609375, -0.00865936279296875, 0.037078857421875, 0.003154754638671875, -0.00222015380859375, -0.0208740234375, -0.005352020263671875, 0.0185546875, 0.01364898681640625, -0.004364013671875, 0.00531768798828125, 0.029296875, 0.022979736328125, 0.014495849609375, 0.009246826171875, -0.00862884521484375, 0.0233306884765625, -0.022247314453125, 0.01371002197265625, -0.01227569580078125, -0.0277557373046875, -0.05218505859375, -0.03778076171875, -0.00688934326171875, -0.021881103515625, 0.01236724853515625, -0.01261138916015625, -0.041534423828125, 0.0069427490234375, -0.007709503173828125, -0.0280609130859375, -0.0117645263671875, -0.003875732421875, -0.005828857421875, 0.034149169921875, -0.00746917724609375, -0.0094146728515625, -0.0085601806640625, -0.0027103424072265625, -0.04296875, -0.0006337165832519531, 0.009368896484375, -0.001964569091796875, 0.0199737548828125, -0.00658416748046875, 0.010894775390625, -0.00945281982421875, 0.0088348388671875, 0.0039520263671875, -0.0038547515869140625, 0.0360107421875, -0.028411865234375, -0.0241851806640625, 0.01334381103515625, -0.0137176513671875, 0.019866943359375, -0.01038360595703125, 0.01007080078125, 0.01197052001953125, -0.027099609375, 0.0162811279296875, 0.005279541015625, 0.0543212890625, 0.0073394775390625, 0.0174713134765625, -0.0215301513671875, -0.031036376953125, 0.007633209228515625, -0.0166168212890625, -0.0208740234375, -0.027923583984375, 0.0360107421875, -0.00469970703125, -0.00905609130859375, 0.004566192626953125, -0.0350341796875, 0.00325775146484375, 0.00902557373046875, -0.0036182403564453125, 0.032440185546875, -0.0025348663330078125, -0.044677734375, -0.043365478515625, 0.050384521484375, -0.039306640625, 0.0226593017578125, -0.03546142578125, -0.0022029876708984375, -0.0117950439453125, -0.027740478515625, -0.0104827880859375, -0.0190887451171875, 0.007022857666015625, 0.0209808349609375, -0.0291290283203125, 0.01486968994140625, 0.0304412841796875, 0.0023250579833984375, -0.0264892578125, -0.0330810546875, 0.006435394287109375, -0.007537841796875, -0.006786346435546875, 0.002483367919921875, -0.00391387939453125, 0.0024738311767578125, -0.00678253173828125, -0.0144195556640625, 0.022796630859375, -0.015106201171875, 0.0083160400390625, -0.0029087066650390625, 0.004230499267578125, -0.0217132568359375, 0.0216064453125, 0.0195465087890625, -0.02484130859375, -0.00732421875, -0.016937255859375, -0.0156402587890625, 0.0170440673828125, 0.029083251953125, 0.004245758056640625, -0.009979248046875, 0.04058837890625, 0.02178955078125, 0.0288543701171875, -0.006992340087890625, 0.002796173095703125, 0.00011938810348510742, -0.00492095947265625, -0.01351165771484375, -0.00789642333984375, 0.0128326416015625, -5.2869319915771484e-05, 0.0096435546875, 0.0005311965942382812, 0.011138916015625, 0.00048065185546875, -0.019195556640625, 0.005229949951171875, 0.020599365234375, -0.0246734619140625, 0.014373779296875, -0.01125335693359375, 0.00363922119140625, 0.03692626953125, 0.026092529296875, -0.01861572265625, 0.002986907958984375, -0.01168060302734375, 0.0152435302734375, 0.0298919677734375, -0.0330810546875, 0.0145416259765625, -0.01715087890625, 0.05133056640625, -0.005756378173828125, -0.0148773193359375, -0.004550933837890625, 0.0162506103515625, 0.04498291015625, -0.01471710205078125, 0.019134521484375, -0.0061492919921875, 0.0184783935546875, -0.030242919921875, -0.04302978515625, -0.006084442138671875, 0.00946044921875, -0.0377197265625, -0.000980377197265625, 0.0024738311767578125, 0.020538330078125, -0.0139312744140625, 0.0240936279296875, 0.007320404052734375, 0.03558349609375, 0.020263671875, 0.0220489501953125, -0.0243377685546875, 0.02105712890625, 0.004608154296875, -0.0197601318359375, 0.021331787109375, 0.0104217529296875, -0.0013418197631835938, 0.0223388671875, 0.0474853515625, -0.0208587646484375, 0.0330810546875, -0.02935791015625, 0.01519775390625, 0.0017328262329101562, 0.002544403076171875, -0.019561767578125, 0.01502227783203125, -0.0166015625, -0.040374755859375, 0.00434112548828125, 0.0023288726806640625, 0.0015544891357421875, -0.043914794921875, 0.03924560546875, -0.00882720947265625, 0.00019216537475585938, 0.01849365234375, -0.0253143310546875, 0.036285400390625, 0.0040740966796875, 0.01445770263671875, -0.027740478515625, -0.0100555419921875, 0.01093292236328125, 0.00524139404296875, -0.0191192626953125, -0.021240234375, 0.007244110107421875, -0.013153076171875, -0.0048370361328125, 0.0025653839111328125, 0.01007080078125, 0.01212310791015625, -0.029693603515625, -0.03741455078125, 0.034393310546875, 0.0028667449951171875, -0.0171661376953125, -0.010833740234375, 0.056793212890625, -0.0322265625, -0.005527496337890625, 0.006496429443359375, -0.0160369873046875, -0.01081085205078125, -0.015350341796875, 0.015777587890625, -0.00522613525390625, -0.007137298583984375, 0.0018377304077148438, -0.0029811859130859375, -0.0101318359375, -0.01507568359375, 0.0166778564453125, -0.00012493133544921875, -0.020782470703125, 0.00479888916015625, -0.0115203857421875, 0.039825439453125, 0.00580596923828125, -0.007015228271484375, 0.03839111328125, -0.01015472412109375, 0.0146942138671875, 0.0095672607421875, 0.02435302734375, -0.00316619873046875, -0.0244140625, 0.00655364990234375, -0.01061248779296875, -0.029083251953125, -0.0109405517578125, 0.0028057098388671875, -0.0251007080078125, 0.0209503173828125, 0.006023406982421875, 0.0148468017578125, -0.0012750625610351562, -0.00316619873046875, -0.00881195068359375, -0.01360321044921875, 0.02532958984375, 0.010406494140625, -0.005584716796875, -0.006072998046875, -0.00926971435546875, 0.0379638671875, 0.0045623779296875, -0.017578125, 0.0078887939453125, 0.018463134765625, -0.0184478759765625, 0.031036376953125, -0.001361846923828125, 0.041046142578125, -0.02191162109375, -0.0025177001953125, -0.0006337165832519531, -0.0041351318359375, 0.00067901611328125, 0.01403045654296875, 0.033416748046875, -0.00814056396484375, 0.0017976760864257812, -0.0214996337890625, -0.0184326171875, 0.009033203125, -6.347894668579102e-05, 0.01904296875, 0.00782012939453125, 0.0008406639099121094, -0.006374359130859375, 0.004108428955078125, 0.0321044921875, 0.0072021484375, -0.036590576171875, 0.01513671875, 0.016082763671875, -0.0179290771484375, -0.0028858184814453125, -0.03155517578125, 0.00518035888671875, 0.0012941360473632812, 0.01007080078125, 0.0042724609375, -0.00621795654296875, -0.00482940673828125, 0.00882720947265625, 0.002643585205078125, -0.0040283203125, -0.0108795166015625, 0.01290130615234375, 0.006183624267578125, -0.0094146728515625, -0.0013589859008789062, -0.005718231201171875, -0.00931549072265625, 0.00504302978515625, 0.03619384765625, 0.0222320556640625, -0.028839111328125, 0.018280029296875, -0.011505126953125, -0.00684356689453125, -0.0168914794921875, 0.0111236572265625, 0.0190887451171875, -0.004390716552734375, -0.0021266937255859375, -0.0032558441162109375, -0.01445770263671875, 0.0245361328125, -0.007076263427734375, 0.0068206787109375, 0.0264892578125, 0.00034880638122558594, 0.02337646484375, -0.040283203125, -0.00693511962890625, 0.019744873046875, 0.0289764404296875, 0.02569580078125, -0.0187530517578125, -0.00888824462890625, 0.02130126953125, 0.028961181640625, 0.0021572113037109375, -0.01311492919921875, -0.00548553466796875, -0.0183868408203125, -0.002410888671875, 0.0223236083984375, 0.0148773193359375, 0.002681732177734375, 0.0304718017578125, 0.0053253173828125, -0.005641937255859375, -0.0191650390625, -0.01001739501953125, -0.015899658203125, -0.00804901123046875, -0.00501251220703125, 0.031890869140625, -0.005435943603515625, 0.0157318115234375, -0.01122283935546875, 0.02392578125, -0.0272064208984375, 0.0029296875, 0.0028839111328125, -0.005092620849609375, 0.0209808349609375, -0.0223236083984375, -0.040802001953125, -0.01690673828125, -0.05224609375, 0.025146484375, 0.014373779296875, -0.0278167724609375, -0.0151214599609375, -0.0023784637451171875, 0.06549072265625, -0.01398468017578125, -0.018096923828125, -0.02227783203125, 0.03546142578125, 0.003726959228515625, -0.0200347900390625, -0.0101776123046875, 0.038055419921875, -0.00939178466796875, -0.0004074573516845703, -0.004268646240234375, 0.0094757080078125, -0.0305023193359375, -0.00772857666015625, 0.036102294921875, -0.0157012939453125, 0.003692626953125, -0.011871337890625, -0.025543212890625, -0.006465911865234375, 0.03668212890625, -0.0004897117614746094, -0.01529693603515625, -0.0207061767578125, 0.032470703125, 0.004856109619140625, 0.009765625, 0.0389404296875, -0.03271484375, 0.0077362060546875, 0.0119171142578125, 0.0160675048828125, -0.05743408203125, 2.4557113647460938e-05, 0.0082855224609375, -0.0256500244140625, 0.01556396484375, -0.0124053955078125, -0.0171356201171875, 0.035491943359375, -0.008575439453125, -0.0197601318359375, 0.001697540283203125, -0.032928466796875, 0.03216552734375, 0.0093536376953125, -0.0207061767578125, 0.034820556640625, 0.005275726318359375, -0.01629638671875, 0.006130218505859375, 0.0298614501953125, -0.00079345703125, 0.004390716552734375, -0.01812744140625, 0.01517486572265625, -0.00775909423828125, -0.002040863037109375, 0.044525146484375, -0.0292205810546875, 0.0305023193359375, 0.020538330078125, 0.01531219482421875, -0.01360321044921875, 0.0130767822265625, -0.009307861328125, 0.031829833984375, 0.03399658203125, 0.0254669189453125, 0.006320953369140625, 0.0003898143768310547, -0.032928466796875, 0.003955841064453125, -0.0214080810546875, -0.0208587646484375, 0.00379180908203125, -0.0023860931396484375, -0.01140594482421875, 0.0160980224609375, -0.0060577392578125, 0.0243988037109375, -0.0303955078125, -0.01256561279296875, 0.007480621337890625, -0.01415252685546875, 0.019683837890625, 0.035919189453125, 0.0035552978515625, -0.00815582275390625, 0.0020084381103515625, 0.0066986083984375, -0.00992584228515625, -0.041900634765625, 0.015625, -0.0072174072265625, 0.0170745849609375, 0.008697509765625, -0.023895263671875, 0.017547607421875, -0.0107269287109375, -0.01641845703125, -0.031890869140625, 0.007396697998046875, 0.038543701171875, 0.007144927978515625, 0.0227813720703125, -0.0018825531005859375, 0.01953125, 0.0085906982421875, -0.004474639892578125, -0.0286407470703125, -0.039276123046875, -0.0316162109375, -0.00823211669921875, -0.026153564453125, 0.00714111328125, -0.0377197265625, 0.0016374588012695312, -0.01367950439453125, -0.008758544921875, 0.0040130615234375, 0.01439666748046875, 0.01024627685546875, 0.0017490386962890625, 0.0016469955444335938, -0.0040130615234375, 0.0238189697265625, -0.0231475830078125, -0.01245880126953125, 0.027313232421875, -0.020660400390625, 0.0293731689453125, -0.0004203319549560547, -0.00859832763671875, 0.0002994537353515625, 0.0225067138671875, -0.0118255615234375, -0.0288848876953125, -0.0183563232421875, -0.0007724761962890625, 0.01934814453125, 0.0014390945434570312, -0.02581787109375, 0.0163116455078125, -0.01096343994140625, 0.057708740234375, 0.00023794174194335938, -0.0253448486328125, 0.0277099609375, 0.002872467041015625, 0.01303863525390625, -0.0001747608184814453, 0.0241546630859375, 0.0119781494140625, 0.01116180419921875, 0.0251617431640625, 0.0134735107421875, 0.049102783203125, 0.0233917236328125, -0.0161285400390625, -0.00226593017578125, 0.014404296875, -0.006465911865234375, -0.0076751708984375, -0.016021728515625, 0.0007472038269042969, -0.03521728515625, -0.01140594482421875, 0.00042510032653808594, -0.0031833648681640625, -0.00021123886108398438, -0.01343536376953125, 0.0184478759765625, -0.02215576171875, 0.00580596923828125, -0.0204925537109375, -0.04241943359375, -0.02532958984375, 0.015472412109375, 0.035125732421875, -0.0011577606201171875, 0.01708984375, 0.0140228271484375, 0.0196685791015625, 0.0304107666015625, 0.006290435791015625, 0.0168304443359375, 0.0025177001953125, 0.0301971435546875, -0.037689208984375, -0.0113677978515625, -0.010589599609375, -0.01096343994140625, -0.0210723876953125, 0.037261962890625, -0.0149383544921875, -0.0001519918441772461, -0.0169830322265625, -0.00482940673828125, -0.013580322265625, 0.019012451171875, 0.01468658447265625, -0.026275634765625, 0.00701904296875, 0.029754638671875, -0.0233917236328125, -0.004871368408203125, 0.00437164306640625, -0.03643798828125, 0.01209259033203125, 0.0185699462890625, 0.0230712890625, 0.022216796875, 0.0171356201171875, 0.00995635986328125, 0.01541900634765625, -0.007434844970703125, -0.007701873779296875, -0.0516357421875, 0.00742340087890625, 0.039520263671875, 0.0243072509765625, 0.00970458984375, 0.035675048828125, -0.00499725341796875, -0.0050811767578125, 0.007049560546875, 0.0174102783203125, -0.0008544921875, -0.0172119140625, -0.00939178466796875, -0.0202484130859375, 0.0005183219909667969, -0.028411865234375, 0.0169525146484375, 0.02069091796875, 0.0013713836669921875, 0.0209808349609375, -0.0117340087890625, 0.0212860107421875, -0.0156707763671875, -0.00934600830078125, 0.00428009033203125, 0.0245208740234375, -0.001708984375, 0.0260467529296875, 0.01070404052734375, -0.02410888671875, 0.01465606689453125, 0.00970458984375, 0.0188751220703125, 0.021240234375, -0.0496826171875, -0.0211639404296875, -0.001415252685546875, 0.0108642578125, 0.0108184814453125, 0.034210205078125, 0.0306549072265625, -0.0001876354217529297, 0.01209259033203125, 0.022613525390625, -0.032684326171875, 0.0303192138671875, -0.00543212890625, 0.01910400390625, 0.030242919921875, 0.006526947021484375, -0.0146026611328125, 0.00432586669921875, -0.0203399658203125, -0.0023174285888671875, 0.0034503936767578125, 0.014404296875, -0.007293701171875, 0.017578125, -0.0146484375, 0.00875091552734375, 0.0011081695556640625, 0.0294189453125, 0.03131103515625, -0.01849365234375, 0.008819580078125, -0.01158905029296875, 0.00447845458984375, -0.0167694091796875, 0.01198577880859375, 0.0110931396484375, 0.0243072509765625, 0.0166473388671875, -0.01270294189453125, -0.002429962158203125, -0.01611328125, -0.01035308837890625, -0.01364898681640625, 0.00249481201171875, -0.0151824951171875, 0.05218505859375, 0.05462646484375, -0.021575927734375, 0.0030002593994140625, 0.003864288330078125, 0.0419921875, 0.019500732421875, 0.004108428955078125, -0.0214691162109375, 0.0164947509765625, 0.012481689453125, -0.0269317626953125, 0.0102081298828125, 0.0152130126953125, -0.02130126953125, 0.037689208984375, 0.01377105712890625, -0.005985260009765625, -0.027374267578125, 0.00494384765625, 0.01296234130859375, -0.009002685546875, 0.00989532470703125, 0.005680084228515625, -0.013275146484375, -0.01366424560546875, -0.00988006591796875, 0.0136566162109375, -0.011932373046875, -0.01244354248046875, -0.01299285888671875, -0.021484375, -0.0012006759643554688, 0.006740570068359375, 0.0249786376953125, 0.0150299072265625, 0.032684326171875, -0.006000518798828125, -0.017242431640625, 0.0021915435791015625, 0.0106201171875, -0.046661376953125, 0.03759765625, -0.016571044921875, -0.02032470703125, -0.03167724609375, -0.0031337738037109375, -0.0268096923828125, -0.03253173828125, -0.03515625, -0.00548553466796875, 0.0178070068359375, 0.021575927734375, 0.01102447509765625, 0.0025501251220703125, -0.01316070556640625, -0.01117706298828125, -0.0223541259765625, 0.0155792236328125, -0.005420684814453125, -0.004261016845703125, -0.0190887451171875, -0.0012311935424804688, -0.00531005859375, -0.003505706787109375, 0.002803802490234375, -0.0419921875, -0.0087738037109375, 0.00804901123046875, 0.01175689697265625, 0.003509521484375, 0.006992340087890625, -0.01346588134765625, -0.004150390625, 0.0162811279296875, 0.0030345916748046875, 0.0110321044921875, 0.000518798828125, 0.006038665771484375, 0.0017986297607421875, 0.03839111328125, 0.0007977485656738281, -0.055999755859375, -0.016876220703125, 0.0212249755859375, 0.0008406639099121094, -0.00461578369140625, 0.044158935546875, 0.0166778564453125, 0.007232666015625, -0.0163726806640625, 0.0208892822265625, 0.01131439208984375, -0.01288604736328125, -0.0140838623046875, -0.00388336181640625, -0.01506805419921875, 0.01568603515625, 0.055267333984375, -0.00982666015625, -0.0278167724609375, 0.00760650634765625, -0.004840850830078125, -0.0278778076171875, 0.005123138427734375, 0.0031757354736328125, 0.01233673095703125, -0.01105499267578125, 7.599592208862305e-05, 0.004245758056640625, 0.015869140625, -0.005519866943359375, -0.0183258056640625, -0.027374267578125, -0.02154541015625, -0.029083251953125, -0.031005859375, -0.0294342041015625, 0.0199432373046875, -0.006999969482421875, -0.01515960693359375, -0.0469970703125, 0.035003662109375, -0.01885986328125, 0.01503753662109375, 0.002307891845703125, -0.016204833984375, -0.01309967041015625, 0.0187225341796875, 0.00417327880859375, 0.0221710205078125, 0.002071380615234375, -0.012115478515625, 0.00815582275390625, -0.008087158203125, -0.0025386810302734375, 0.026763916015625, 0.0121612548828125, 0.00618743896484375, -0.0216217041015625, 0.03759765625, -0.0165557861328125, -0.006343841552734375, 0.0163421630859375, 0.000518798828125, 0.012298583984375, -0.00634765625, -0.0002751350402832031, 0.019683837890625, -0.02001953125, -0.008514404296875, 0.0027675628662109375, -0.00406646728515625, -0.00423431396484375, 0.00994873046875, 0.02374267578125, -0.021575927734375, 0.026214599609375, -0.040863037109375, 0.01372528076171875, -0.01517486572265625, 0.01274871826171875, -0.0064239501953125, 0.032135009765625, 0.0167388916015625, -0.01078033447265625, -0.0214996337890625, 0.01800537109375, -0.0003590583801269531, -0.016448974609375, -0.0172119140625, 0.00891876220703125, 0.00582122802734375, -0.00896453857421875, 0.0002644062042236328, -0.0009889602661132812, 0.0106201171875, 0.03326416015625, -0.03240966796875, 0.033538818359375, -0.0282440185546875, -0.01172637939453125, -0.010528564453125, 0.0116119384765625, -0.005962371826171875, 0.043121337890625, -0.0007829666137695312, 0.00191497802734375, 0.059173583984375, 0.0125579833984375, 0.00614166259765625, 0.023895263671875, 0.004253387451171875, 0.010284423828125, -0.0080718994140625, -0.00901031494140625, 0.00853729248046875, -0.018951416015625, 0.0055999755859375, -0.0235748291015625, 0.004222869873046875, 0.0204315185546875, 0.02130126953125, 0.00846099853515625, 0.011138916015625, -0.0149993896484375, 0.00942230224609375, 0.005283355712890625, -0.0053558349609375, 0.0107574462890625, -0.028106689453125, 0.00983428955078125, 0.0284271240234375, 0.0277252197265625, 0.01107025146484375, 0.00934600830078125, -0.00968170166015625, 0.002490997314453125, 0.01422119140625, 0.0209503173828125, -0.0223541259765625, -0.030609130859375, -0.00386810302734375, 0.00567626953125, -0.0207366943359375, 0.018768310546875, 0.024261474609375, 0.0013723373413085938, 0.02069091796875, 0.01073455810546875, 0.0011138916015625, 0.003208160400390625, -0.0126800537109375, 0.04632568359375, 0.0030384063720703125, -0.03936767578125, 0.040374755859375, 0.01202392578125, -0.04608154296875, -0.0198974609375, 0.0193939208984375, 0.03814697265625, -0.038909912109375, 0.006748199462890625, 0.00508880615234375, -0.004955291748046875, 0.0110626220703125, 0.002407073974609375, 0.0128326416015625, -0.0101776123046875, -0.03826904296875, -0.0030803680419921875, -0.03369140625, 0.0179290771484375, -0.050079345703125, -0.0227508544921875, 0.01032257080078125, -0.020782470703125, -0.00850677490234375, -0.027099609375, 0.004070281982421875, -0.00650787353515625, -0.01079559326171875, 0.0006113052368164062, 0.0256500244140625, -0.034637451171875, 0.037750244140625, -0.0016326904296875, 0.0166778564453125, -0.033599853515625, 0.00432586669921875, 0.02801513671875, -0.006511688232421875, 0.0027904510498046875, -0.01299285888671875, 0.01081085205078125, -0.030914306640625, 0.0269012451171875, 0.0185089111328125, 0.008880615234375, -0.0030956268310546875, -0.0002598762512207031, 0.0188751220703125, 0.0197296142578125, -0.033111572265625, -0.0240631103515625, -0.01470184326171875, -0.03863525390625, 0.00666046142578125, 0.00469970703125, -0.00986480712890625, 0.01531982421875, -0.002666473388671875, 0.01294708251953125, -0.0338134765625, -0.003604888916015625, -0.00778961181640625, -0.027801513671875, -0.0211181640625, 0.0086212158203125, -0.0234527587890625, -0.0254058837890625, -0.0252227783203125, 0.009552001953125, -0.013397216796875, 0.0011472702026367188, 0.0028858184814453125, 0.0158233642578125, -0.01415252685546875, 0.0302886962890625, -0.01702880859375, 0.004383087158203125, -0.006214141845703125, -0.00045609474182128906, 0.00949859619140625, 0.00439453125, -0.035247802734375, 0.004390716552734375, -0.007122039794921875, -0.0587158203125, -0.00782012939453125, 0.0162506103515625, -0.03509521484375, -0.02191162109375, -0.01293182373046875, -0.00681304931640625, 0.00916290283203125, 0.034576416015625, 0.02978515625, -0.01390838623046875, 0.0078277587890625, 0.0225830078125, 0.0140228271484375, 0.004016876220703125, 0.0169525146484375, 0.005931854248046875, -0.0196685791015625, 0.00894927978515625, -0.022735595703125, -0.02130126953125, 0.015899658203125, 0.006847381591796875, -0.004825592041015625, 0.0249481201171875, 0.033721923828125, -0.0012025833129882812, 0.027862548828125, 0.03240966796875, 0.01268768310546875, -0.02838134765625, 0.0096435546875, 0.0016851425170898438, 0.0171356201171875, 0.0390625, -0.00572967529296875, 0.002918243408203125, -0.01503753662109375, 0.037750244140625, -0.036163330078125, -0.01404571533203125, -0.01334381103515625, -0.007358551025390625, 0.00689697265625, 0.0048980712890625, -0.0010013580322265625, -0.0006198883056640625, 0.00672149658203125, 0.0202789306640625, -0.010986328125, -0.02630615234375, -0.011444091796875, 0.01328277587890625, 0.0281829833984375, 0.009490966796875, 0.0301513671875, 0.002483367919921875, 0.0217742919921875, -0.0210418701171875, -0.0022430419921875, -0.01074981689453125, 0.01363372802734375, 0.01316070556640625, -0.032928466796875, 0.01213836669921875, -0.00128936767578125, -0.01258087158203125, -0.0182037353515625, 0.0270233154296875, 0.0241851806640625, 0.02764892578125, 0.00492095947265625, -0.005420684814453125, 0.0176849365234375, 0.02215576171875, -0.0261077880859375, -0.0023193359375, 0.006404876708984375, -0.0146484375, -0.0141448974609375, -0.01036834716796875, 0.034637451171875, 0.010162353515625, 0.0073089599609375, -0.0178070068359375, 0.010894775390625, -0.005809783935546875, -0.034423828125, 0.0230865478515625, 0.004230499267578125, 0.004878997802734375, -0.005889892578125, 0.00933074951171875, -0.0140380859375, 0.0209503173828125, -0.00809478759765625, 0.01131439208984375, -0.00867462158203125, -0.0241241455078125, -0.024322509765625, 0.0021076202392578125, 0.01190185546875, -0.0032520294189453125, -0.00466156005859375, 0.009368896484375, 0.0272674560546875, -0.007007598876953125, -0.006641387939453125, 0.02313232421875, 0.02740478515625, -0.0002484321594238281, 0.0029125213623046875, 0.006206512451171875, 0.026824951171875, 0.0153961181640625, -0.01338958740234375, -0.0113372802734375, -0.019927978515625, -0.0016946792602539062, -0.020355224609375, 0.005401611328125, 0.007068634033203125, -0.0274658203125, -0.01145172119140625, -0.003376007080078125, -0.0214385986328125, 0.01114654541015625, -0.0293731689453125, 0.0207061767578125, -0.0439453125, -0.0307769775390625, 0.0287322998046875, 0.02484130859375, 0.01413726806640625, -0.035614013671875, 0.019439697265625, -0.0270538330078125, 0.00791168212890625, 0.0256500244140625, -0.0192108154296875, 0.06146240234375, -0.00873565673828125, 0.0013723373413085938, 0.0037326812744140625, 0.017181396484375, 0.024932861328125, 0.0128326416015625, -0.00911712646484375, -0.0199127197265625, 0.0156402587890625, -0.0017004013061523438, -0.023345947265625, 0.0031185150146484375, -0.00827789306640625, -0.00537109375, 0.004016876220703125, 0.006519317626953125, -0.034942626953125, -0.040191650390625, -0.01064300537109375, -0.0018825531005859375, 0.03753662109375, -0.0003268718719482422, -0.029815673828125, 0.0050811767578125, -0.0093536376953125, 0.0026073455810546875, -0.0345458984375, -0.00952911376953125, 0.0269927978515625, -0.01122283935546875, -0.005023956298828125, 0.042510986328125, 0.020477294921875, 0.02325439453125, -0.00702667236328125, -0.01085662841796875, 0.007572174072265625, 0.0262603759765625, 0.0035457611083984375, 0.0211029052734375, 0.028656005859375, -0.028656005859375, 0.016204833984375, 0.025421142578125, 0.0014238357543945312, -0.031036376953125, -0.01364898681640625, 0.0227203369140625, 0.0089111328125, 0.00554656982421875, -0.00435638427734375, -0.006694793701171875, -0.01812744140625, 0.015899658203125, -0.020355224609375, -0.01273345947265625, -0.0347900390625, 0.0010709762573242188, -0.025177001953125, -0.02642822265625, 0.0174560546875, -0.007419586181640625, -0.01444244384765625, 0.01444244384765625, -0.03045654296875, 0.0226593017578125, -0.00778961181640625, -0.005733489990234375, -0.0083465576171875, 0.01528167724609375, -0.0174407958984375, 0.02020263671875, 0.011993408203125, -0.00890350341796875, -0.012451171875, -0.037994384765625, -0.004573822021484375, 0.01309967041015625, 0.0298919677734375, -0.0009241104125976562, 0.035736083984375, 0.0218658447265625, 0.0132293701171875, -0.0174560546875, -0.00414276123046875, -0.01189422607421875, 0.005741119384765625, -0.01126861572265625, 0.015533447265625, -0.0036525726318359375, 0.0030059814453125, 0.0005106925964355469, 0.0017070770263671875, 0.00875091552734375, -0.0178680419921875, 0.0113372802734375, 0.01149749755859375, 0.0350341796875, -0.010589599609375, 0.005321502685546875, 0.00786590576171875, -0.0278167724609375, 0.0014142990112304688, -0.0012912750244140625, 0.0036334991455078125, -0.0036563873291015625, -0.0245208740234375, -0.004062652587890625, -0.0191192626953125, 0.024078369140625, 0.006763458251953125, 0.02777099609375, -0.01473236083984375, 0.0302276611328125, 0.0180206298828125, -0.02093505859375, 0.00704193115234375, -0.001888275146484375, -0.01428985595703125, -0.0251312255859375, 0.0119476318359375, 0.021636962890625, 0.016021728515625, -0.048736572265625, 0.0183258056640625, -0.02099609375, 0.020050048828125, 0.016815185546875, -0.033447265625, -0.0081787109375, 0.01165008544921875, -0.00980377197265625, 0.0009350776672363281, -0.022491455078125, 0.0038604736328125, -0.03948974609375, 0.021697998046875, 0.004314422607421875, -0.0179443359375, 0.0191192626953125, 0.0011816024780273438, 0.02154541015625, -0.0029888153076171875, -0.0201873779296875, -0.006275177001953125, 0.050872802734375, -0.0136566162109375, -0.008880615234375, -0.0110626220703125, 0.004238128662109375, 0.0195159912109375, 0.01104736328125, 0.02459716796875, 0.0252838134765625, -0.0028705596923828125, 0.00846099853515625, -0.006565093994140625, -0.0069580078125, 0.025360107421875, 0.0013170242309570312, 0.0174560546875, 0.001216888427734375, 0.003978729248046875, -0.01383209228515625, -0.059600830078125, -0.0133056640625, -0.0258941650390625, -0.07550048828125, 0.003894805908203125, -0.0286712646484375, -0.024627685546875, -0.003253936767578125, 0.00803375244140625, 0.01505279541015625, -0.00930023193359375, -0.01021575927734375, -0.010833740234375, -0.0237274169921875, -0.0025634765625, -0.023345947265625, 0.00597381591796875, -0.00849151611328125, 0.0155181884765625, -0.0214080810546875, 0.0283660888671875, 0.004856109619140625, 0.0214996337890625, 0.0305938720703125, -0.004520416259765625, 0.02215576171875, 0.00966644287109375, -0.013092041015625, -0.04913330078125, -0.0243072509765625, 0.0226287841796875, -0.0162811279296875, -0.006740570068359375, -0.0134124755859375, 0.0228118896484375, 0.0419921875, 0.005535125732421875, -0.02099609375, 0.00543975830078125, -0.0021915435791015625, -0.02386474609375, -0.0085601806640625, -0.0268402099609375, 0.0296478271484375, -0.012054443359375, 0.0020542144775390625, -0.02960205078125, 0.0164794921875, 0.0137176513671875, -0.01213836669921875, -0.0018110275268554688, 0.00273895263671875, -0.0225677490234375, 0.051116943359375, 0.018798828125, -0.02862548828125, 0.006977081298828125, 0.0156097412109375, -0.00685882568359375, -0.0094757080078125, 0.0230712890625, 0.003955841064453125, -0.041534423828125, 0.00815582275390625, -0.00434112548828125, 0.0011272430419921875, 0.0172271728515625, 0.0081024169921875, -0.0323486328125, 0.0262451171875, -0.01316070556640625, -0.003955841064453125, -0.01250457763671875, 0.0020294189453125, -0.00019419193267822266, 0.018218994140625, -0.0185089111328125, -0.023468017578125, -0.021575927734375, 0.0251922607421875, -0.02374267578125, -0.037017822265625, 0.00811004638671875, 0.0011701583862304688, 0.0100555419921875, -0.017425537109375, 0.01036834716796875, 0.005031585693359375, -0.024932861328125, 0.00974273681640625, -0.00537872314453125, -0.00904083251953125, -0.0003502368927001953, 0.01258087158203125, -0.01287841796875, -0.030609130859375, -0.0155029296875, -0.006175994873046875, 0.04296875, 0.0022907257080078125, 0.011505126953125, 0.0010290145874023438, -0.020782470703125, -0.043487548828125, 0.0264129638671875, -0.0171356201171875, -0.0002980232238769531, 0.01337432861328125, 0.00586700439453125, -0.0303802490234375, 0.0227203369140625, -0.005062103271484375, 0.006397247314453125, 0.005809783935546875, -0.0303802490234375, 0.005584716796875, -0.006710052490234375, -0.0160980224609375, 0.01116180419921875, 0.0284271240234375, -0.0240936279296875, 0.031707763671875, -0.0017251968383789062, 0.0253448486328125, 0.0183563232421875, -0.01308441162109375, 0.03656005859375, -0.03717041015625, 0.042724609375, -0.0198974609375, 0.015960693359375, 0.0114288330078125, 0.0341796875, -0.02435302734375, -0.01088714599609375, 0.00818634033203125, 0.0218353271484375, -0.0267486572265625, 0.03460693359375, 0.005535125732421875, 0.0118255615234375, 0.0190887451171875, 0.00902557373046875, 0.0257415771484375, -0.0271453857421875, -0.01641845703125, 0.0137481689453125, -0.006717681884765625, 0.03558349609375, -0.00021016597747802734, -0.035308837890625, -0.01459503173828125, 0.0256500244140625, -0.027923583984375, 0.007358551025390625, -0.0186767578125, 0.0010576248168945312, -0.034759521484375, -0.00433349609375, -0.055633544921875, -0.042938232421875, -0.017486572265625, 0.0009660720825195312, -0.0110321044921875, -0.001537322998046875, -0.007354736328125, 0.0102996826171875, -0.003116607666015625, -0.03173828125, -0.00824737548828125, -0.0109710693359375, 0.006137847900390625, -0.0128326416015625, -0.01244354248046875, -0.0455322265625, -0.003513336181640625, 0.01367950439453125, -0.0235595703125, -0.018157958984375, 0.007106781005859375, 0.040740966796875, 0.04608154296875, 0.02899169921875, -0.0226593017578125, 0.045501708984375, -0.0153045654296875, -0.005542755126953125, -0.01496124267578125, 0.0265350341796875, -0.0175933837890625, -0.005817413330078125, -0.0166778564453125, -0.0032672882080078125, 0.04046630859375, -0.0195159912109375, 0.00919342041015625, 0.0007829666137695312, -0.015411376953125, 0.029022216796875, -0.00949859619140625, 0.01458740234375, -0.0243682861328125, -0.01045989990234375, 0.0215301513671875, -0.032440185546875, 0.042816162109375, -0.00807952880859375, -0.01678466796875, -0.0027523040771484375, -0.01544189453125, -0.007389068603515625, 0.0081787109375, 0.0240936279296875, 0.0271453857421875, 0.0196075439453125, 0.007045745849609375, -0.010833740234375, -0.01136016845703125, 0.034515380859375, 0.016571044921875, 0.0031528472900390625, 0.01154327392578125, 0.01751708984375, -0.03509521484375, 0.01467132568359375, 0.0218963623046875, 0.03936767578125, 0.01062774658203125, -0.007534027099609375, -0.0022525787353515625, 0.0009412765502929688, 0.004283905029296875, 0.000675201416015625, -0.0008649826049804688, -0.0250701904296875, 0.006561279296875, 0.0038051605224609375, 0.000530242919921875, -0.0029239654541015625, -0.006526947021484375, 0.00775909423828125, 0.033477783203125, -0.0007214546203613281, 0.0078277587890625, -0.021728515625, -0.0010509490966796875, -0.00418853759765625, -0.006847381591796875, -0.0118255615234375, 0.003490447998046875, 0.033050537109375, -0.00814056396484375, -0.0266876220703125, -0.011444091796875, 0.025421142578125, 0.0031604766845703125, 0.0023193359375, -0.016693115234375, -0.0037403106689453125, -0.01611328125, -0.0467529296875, -0.035491943359375, -0.01617431640625, 0.0260009765625, -0.0089263916015625, -0.00390625, 0.006244659423828125, 0.0110626220703125, -0.048980712890625, 0.0016946792602539062, -0.00299835205078125, 0.0010242462158203125, 0.0269317626953125, -0.002330780029296875, 0.0125579833984375, -0.0157012939453125, -0.0180816650390625, -0.04119873046875, -0.0377197265625, 0.0130462646484375, 0.001399993896484375, 0.015167236328125, -0.0201873779296875, 0.006496429443359375, 0.0005202293395996094, 0.04522705078125, 0.01148223876953125, -4.64320182800293e-05, -0.02935791015625, 0.00325775146484375, 0.01227569580078125, 0.04583740234375, 0.0157470703125, 0.0084686279296875, -0.00402069091796875, 0.005550384521484375, -0.004245758056640625, 0.019683837890625, -0.0379638671875, -0.01861572265625, -0.045013427734375, 0.047454833984375, 0.002689361572265625, -0.0244598388671875, -0.00359344482421875, -0.01505279541015625, -0.03021240234375, 0.00957489013671875, -0.0265960693359375, 0.014739990234375, 0.038726806640625, -0.009613037109375, -0.0217437744140625, 0.0239715576171875, 0.004932403564453125, 0.00125885009765625, -0.0023517608642578125, -0.008544921875, -0.0098876953125, -0.0138397216796875, 0.0099639892578125, -0.0071563720703125, -0.0094451904296875, -0.0194091796875, -0.025421142578125, -0.003337860107421875, -0.03485107421875, 0.0147552490234375, -0.0130767822265625, 0.0008521080017089844, 0.01238250732421875, 0.01153564453125, -0.0018157958984375, -0.02593994140625, 0.00542449951171875, 0.029754638671875, 0.031585693359375, -0.01067352294921875, -0.0277862548828125, 0.035491943359375, -0.0226898193359375, -0.0008716583251953125, 0.0079498291015625, -0.0133209228515625, 0.0167694091796875, -0.01495361328125, -0.0108489990234375, -0.006015777587890625, 0.006267547607421875, -0.0074005126953125, 0.00177764892578125, 0.00482177734375, -0.029541015625, -0.0054473876953125, -0.0146942138671875, 0.0018625259399414062, -0.022735595703125, -0.001079559326171875, 0.006999969482421875, 0.0203857421875, -0.0242462158203125, -0.01180267333984375, 0.0066986083984375, 0.003009796142578125, -0.029083251953125, 8.481740951538086e-05, -0.00809478759765625, 0.005992889404296875, 0.01849365234375, -0.00945281982421875, 0.00778961181640625, -0.00656890869140625, 0.0125885009765625, 0.0138702392578125, -0.01983642578125, -0.0271148681640625, -0.00910186767578125, -0.0173797607421875, 0.0088043212890625, 0.0035419464111328125, -0.01427459716796875, 0.018768310546875, -0.0006532669067382812, -0.027557373046875, -0.0249176025390625, 0.0285186767578125, 0.0179290771484375, -0.01100921630859375, 0.004062652587890625, -0.0258331298828125, -0.01495361328125, 0.00592803955078125, 0.055633544921875, 0.00730133056640625, -0.00859832763671875, 0.0065765380859375, -0.012969970703125, -0.016876220703125, -0.0090484619140625, -0.0196685791015625, 0.0038280487060546875, 0.001190185546875, 0.00017964839935302734, 0.0190582275390625, -0.0146026611328125, -0.005229949951171875, -0.0182037353515625, -0.0086669921875, 0.01381683349609375, 0.0255279541015625, -0.0112152099609375, -0.002471923828125, -0.0102691650390625, -5.632638931274414e-05, 0.01142120361328125, 0.03253173828125, -0.0190277099609375, -0.0278472900390625, -0.0199127197265625, -0.0078582763671875, 0.01116943359375, 0.019134521484375, 0.01496124267578125, -0.008819580078125, 0.0209197998046875, 0.0036258697509765625, 0.019866943359375, -0.0053558349609375, 0.00882720947265625, -0.0125579833984375, 0.014312744140625, 0.005237579345703125, 0.0021495819091796875, 0.0189056396484375, 0.006443023681640625, 0.02679443359375, -0.00439453125, -0.0025806427001953125, -0.0067138671875, -0.01224517822265625, -0.0239410400390625, 0.050262451171875, -0.0011434555053710938, -0.0035610198974609375, -0.0013799667358398438, -0.0159454345703125, 0.00827789306640625, 0.01239776611328125, 0.014373779296875, 0.00804901123046875, 0.00780487060546875, -0.0178680419921875, -0.016326904296875, -0.0252685546875, -0.0146942138671875, 0.0034351348876953125, 0.016571044921875, 0.0151214599609375, 0.0039825439453125, -0.0085601806640625, 0.0010900497436523438, 0.00196075439453125, -0.025543212890625, 0.034912109375, 0.0037288665771484375, 0.022613525390625, -0.018707275390625, -0.01351165771484375, 0.0230560302734375, -0.004116058349609375, -0.00257110595703125, -0.0249176025390625, 0.006763458251953125, -0.0026874542236328125, 0.0084381103515625, 0.007488250732421875, -0.03302001953125, -0.0155029296875, 0.0214691162109375, -0.001739501953125, 0.009735107421875, -0.0236663818359375, -0.020751953125, 0.010040283203125, 0.025054931640625, -0.005573272705078125, 0.04052734375, 0.00020301342010498047, -0.00534820556640625, -0.021942138671875, -0.034210205078125, 0.05010986328125, -0.0226287841796875, -0.01255035400390625, -0.022613525390625, 0.002162933349609375, -0.0195159912109375, 0.014404296875, 0.01024627685546875, 0.0009737014770507812, 0.012176513671875, -0.01052093505859375, 0.007282257080078125, 0.0202789306640625, 0.0295257568359375, 0.005523681640625, -0.0350341796875, -0.0023555755615234375, -0.02032470703125, -0.01215362548828125, -0.021148681640625, -0.0033283233642578125, -0.0232086181640625, 0.0194854736328125, 0.00017654895782470703, 0.032867431640625, 0.0130767822265625, 0.02740478515625, -0.005657196044921875, -0.004055023193359375, -0.00954437255859375, -0.01152801513671875, 0.004734039306640625, 0.005428314208984375, -0.00847625732421875, 0.004100799560546875, -0.0287322998046875, 0.01004791259765625, -0.02984619140625, -2.8789043426513672e-05, 0.03515625, -0.0175323486328125, -0.0067596435546875, 0.004913330078125, 0.0084075927734375, 0.006175994873046875, -0.0026149749755859375, -0.00972747802734375, -0.010101318359375, 0.00047206878662109375, -0.0109100341796875, 0.005252838134765625, 0.03387451171875, 0.0318603515625, 0.0104522705078125, 0.010711669921875, -0.0051422119140625, 0.01203155517578125, -0.0186004638671875, -0.0299835205078125, 0.0097503662109375, -0.0020904541015625]'  # cały string z CSV
v = ast.literal_eval(s)
print(len(v))


2560
