# BERT Portuguese STS Embeddings

This notebook generates embeddings using `neuralmind/bert-large-portuguese-cased` model on the `merged_data_lemm.parquet` dataset efficiently and stores the results with row IDs.

In [1]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm
import pickle
import gc
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the dataset
data_path = "/home/joaquino/portuguese-llm/data/merged_data_lemm.parquet"
df = pd.read_parquet(data_path)

print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
df.head()

Dataset shape: (24000, 27)
Columns: ['messageId', 'message', 'trait', 'original_prompt', 'model', 'response', 'responseId', 'udpipe_result', 'MLC', 'MLS', 'DCC', 'CPC', 'profundidade_media', 'profundidade_max', 'ttr', 'lexical_density', 'token_quantity', 'adjective_list', 'substantive_list', 'genero', 'raca', 'regiao', 'localidade', 'unused', 'artigo', 'pronome', 'response_lemm']

First few rows:


Unnamed: 0,messageId,message,trait,original_prompt,model,response,responseId,udpipe_result,MLC,MLS,...,adjective_list,substantive_list,genero,raca,regiao,localidade,unused,artigo,pronome,response_lemm
0,14afb0c8-a4b2-52d5-9c35-8dd0e13b97de,role=<MessageRole.User: 'user'> content=[TextC...,"{'genero': 'homem', 'raca': 'preta', 'regiao':...",<user>\n Imagine que você é uma pessoa {{ge...,gpt-4o-mini,"Eu sou um homem negro, nascido e criado no Nor...",407b48b9-b0ed-5658-bba7-4180c43cd30c,"# generator = UDPipe 2, https://lindat.mff.cun...",20.272727,24.777778,...,"[{'id': 5, 'form': 'negro', 'lemma': 'negro', ...","[{'id': 4, 'form': 'homem', 'lemma': 'homem', ...",homem,preta,nortista,brasileira,,o,ele,eu ser um homem negro nascer e criar _ em o No...
1,14afb0c8-a4b2-52d5-9c35-8dd0e13b97de,role=<MessageRole.User: 'user'> content=[TextC...,"{'genero': 'homem', 'raca': 'preta', 'regiao':...",<user>\n Imagine que você é uma pessoa {{ge...,gpt-4o-mini,"Eu sou um homem preto, nascido e criado no Nor...",68156cbb-c93c-5d05-9ba6-cbb2e6d6ee55,"# generator = UDPipe 2, https://lindat.mff.cun...",22.0,26.888889,...,"[{'id': 5, 'form': 'preto', 'lemma': 'preto', ...","[{'id': 4, 'form': 'homem', 'lemma': 'homem', ...",homem,preta,nortista,brasileira,,o,ele,eu ser um homem preto nascer e criar _ em o No...
2,14afb0c8-a4b2-52d5-9c35-8dd0e13b97de,role=<MessageRole.User: 'user'> content=[TextC...,"{'genero': 'homem', 'raca': 'preta', 'regiao':...",<user>\n Imagine que você é uma pessoa {{ge...,gpt-4o-mini,"Eu sou um homem negro, nordestino e carrego em...",da84a465-0723-5ccd-a449-65c89840bc1e,"# generator = UDPipe 2, https://lindat.mff.cun...",18.266667,27.4,...,"[{'id': 5, 'form': 'negro', 'lemma': 'negro', ...","[{'id': 4, 'form': 'homem', 'lemma': 'homem', ...",homem,preta,nortista,brasileira,,o,ele,eu ser um homem negro nordestino e carregar em...
3,14afb0c8-a4b2-52d5-9c35-8dd0e13b97de,role=<MessageRole.User: 'user'> content=[TextC...,"{'genero': 'homem', 'raca': 'preta', 'regiao':...",<user>\n Imagine que você é uma pessoa {{ge...,gpt-4o-mini,"Eu sou um homem negro, natural do Norte do Bra...",91f9690e-b62d-54dc-a365-9803475f3433,"# generator = UDPipe 2, https://lindat.mff.cun...",32.375,21.583333,...,"[{'id': 5, 'form': 'negro', 'lemma': 'negro', ...","[{'id': 4, 'form': 'homem', 'lemma': 'homem', ...",homem,preta,nortista,brasileira,,o,ele,eu ser um homem negro natural _ de o Norte _ d...
4,14afb0c8-a4b2-52d5-9c35-8dd0e13b97de,role=<MessageRole.User: 'user'> content=[TextC...,"{'genero': 'homem', 'raca': 'preta', 'regiao':...",<user>\n Imagine que você é uma pessoa {{ge...,gpt-4o-mini,"Eu sou um homem negro, nascido e criado no Nor...",aa059409-4765-5520-9caf-348a2c2911ee,"# generator = UDPipe 2, https://lindat.mff.cun...",15.722222,25.727273,...,"[{'id': 5, 'form': 'negro', 'lemma': 'negro', ...","[{'id': 4, 'form': 'homem', 'lemma': 'homem', ...",homem,preta,nortista,brasileira,,o,ele,eu ser um homem negro nascer e criar _ em o No...


In [3]:
# Setup device and model configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Model configuration
model_name = "rufimelo/bert-large-portuguese-cased-sts"
max_length = 512
batch_size = 16  # Adjust based on your GPU memory

# Output directory for embeddings
embeddings_dir = "/home/joaquino/portuguese-llm/embeddings/bert_embeddings"
os.makedirs(embeddings_dir, exist_ok=True)

Using device: cuda


In [4]:
# Load BERT model and tokenizer
print(f"Loading BERT model: {model_name}")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model = model.to(device)
model.eval()

print("Model loaded successfully!")

Loading BERT model: rufimelo/bert-large-portuguese-cased-sts
Model loaded successfully!
Model loaded successfully!


In [5]:
class TextDataset(Dataset):
    """Efficient dataset for BERT embedding generation"""
    
    def __init__(self, texts, ids, tokenizer, max_length=512):
        self.texts = texts
        self.ids = ids
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx]) if self.texts[idx] is not None else ""
        text_id = self.ids[idx]
        
        # Tokenize text
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'text_id': text_id
        }

In [6]:
def generate_embeddings_batch(model, dataloader, device):
    """Generate BERT embeddings for texts in batches"""
    
    all_embeddings = []
    all_ids = []
    
    model.eval()
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Generating embeddings"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            text_ids = batch['text_id']
            
            # Get model outputs
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            
            # Use [CLS] token embeddings (first token) as sentence representation
            cls_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
            
            all_embeddings.append(cls_embeddings)
            
            # Handle text_ids - it could be a tensor or list depending on the dataloader
            if hasattr(text_ids, 'tolist'):
                all_ids.extend(text_ids.tolist())
            else:
                all_ids.extend(text_ids)
            
            # Free GPU memory
            del input_ids, attention_mask, outputs
            torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    # Concatenate all embeddings
    embeddings = np.vstack(all_embeddings)
    
    return embeddings, all_ids

In [7]:
# Prepare data for embedding generation
# Check what text column to use
text_columns = ['response', 'text', 'content', 'lemmatized_text']
available_columns = [col for col in text_columns if col in df.columns]

print(f"Available text columns: {available_columns}")
print(f"All columns: {list(df.columns)}")

# Use the first available text column or 'response' as default
text_column = available_columns[0] if available_columns else 'response'
print(f"Using text column: '{text_column}'")

# Check if responseId column exists
if 'responseId' not in df.columns:
    raise ValueError("Column 'responseId' not found in dataset. Available columns: " + str(list(df.columns)))

# Clean data - remove empty texts and missing responseIds
df_clean = df[
    df[text_column].notna() & 
    (df[text_column].str.strip() != '') &
    df['responseId'].notna()
].copy()

print(f"Data shape after cleaning: {df_clean.shape}")
print(f"Using responseId as identifier for embeddings")

# Reset index for consistent processing but keep responseId
df_clean = df_clean.reset_index(drop=True)

Available text columns: ['response']
All columns: ['messageId', 'message', 'trait', 'original_prompt', 'model', 'response', 'responseId', 'udpipe_result', 'MLC', 'MLS', 'DCC', 'CPC', 'profundidade_media', 'profundidade_max', 'ttr', 'lexical_density', 'token_quantity', 'adjective_list', 'substantive_list', 'genero', 'raca', 'regiao', 'localidade', 'unused', 'artigo', 'pronome', 'response_lemm']
Using text column: 'response'
Data shape after cleaning: (24000, 27)
Using responseId as identifier for embeddings


In [8]:
# Create dataset and dataloader
texts = df_clean[text_column].tolist()
response_ids = df_clean['responseId'].tolist()

print(f"Processing {len(texts)} texts...")
print(f"Sample text: {texts[0][:200]}...")
print(f"Sample responseId: {response_ids[0]}")

# Create dataset
dataset = TextDataset(texts, response_ids, tokenizer, max_length)

# Create dataloader
dataloader = DataLoader(
    dataset, 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=2,
    pin_memory=True if torch.cuda.is_available() else False
)

print(f"Dataset created with {len(dataset)} samples")
print(f"Number of batches: {len(dataloader)}")

Processing 24000 texts...
Sample text: Eu sou um homem negro, nascido e criado no Norte do Brasil. Minha pele tem um tom profundo, que carrega histórias e ancestralidade, refletindo a rica cultura da minha região. Cresci rodeado pela natur...
Sample responseId: 407b48b9-b0ed-5658-bba7-4180c43cd30c
Dataset created with 24000 samples
Number of batches: 1500


In [9]:
# Generate embeddings
print("Starting embedding generation...")
print(f"Estimated time: ~{len(dataloader) * 0.5:.1f} minutes (approximate)")

embeddings, embedding_ids = generate_embeddings_batch(model, dataloader, device)

print(f"Generated embeddings shape: {embeddings.shape}")
print(f"Number of embedding IDs: {len(embedding_ids)}")
print(f"Embedding dimension: {embeddings.shape[1]}")

# Verify alignment
assert len(embedding_ids) == embeddings.shape[0], "Mismatch between embeddings and IDs"
print("✓ Embeddings and IDs are properly aligned")

Starting embedding generation...
Estimated time: ~750.0 minutes (approximate)


Generating embeddings:   0%|          | 0/1500 [00:00<?, ?it/s]

Generating embeddings: 100%|██████████| 1500/1500 [14:19<00:00,  1.75it/s]

Generated embeddings shape: (24000, 1024)
Number of embedding IDs: 24000
Embedding dimension: 1024
✓ Embeddings and IDs are properly aligned





In [10]:
# Save embeddings to disk
print("Saving embeddings to disk...")

# 1. Save as numpy arrays (most efficient)
embeddings_file = os.path.join(embeddings_dir, "bert_embeddings.npy")
ids_file = os.path.join(embeddings_dir, "response_ids.npy")

np.save(embeddings_file, embeddings)
np.save(ids_file, np.array(embedding_ids))

print(f"✓ Saved embeddings to: {embeddings_file}")
print(f"✓ Saved responseIds to: {ids_file}")

# 2. Save as pickle for convenience (includes both embeddings and IDs)
pickle_file = os.path.join(embeddings_dir, "bert_embeddings_with_ids.pkl")
with open(pickle_file, 'wb') as f:
    pickle.dump({
        'embeddings': embeddings,
        'response_ids': embedding_ids,
        'model_name': model_name,
        'embedding_dim': embeddings.shape[1],
        'text_column': text_column
    }, f)

print(f"✓ Saved complete embedding data to: {pickle_file}")

# 3. Save as DataFrame for easy analysis
embedding_df = pd.DataFrame(embeddings)
embedding_df['responseId'] = embedding_ids
embedding_df_file = os.path.join(embeddings_dir, "bert_embeddings_df.parquet")
embedding_df.to_parquet(embedding_df_file, index=False)

print(f"✓ Saved DataFrame to: {embedding_df_file}")

# Create metadata file
metadata = {
    'model_name': model_name,
    'text_column': text_column,
    'id_column': 'responseId',
    'embedding_dimension': int(embeddings.shape[1]),
    'num_samples': int(embeddings.shape[0]),
    'max_length': max_length,
    'batch_size': batch_size,
    'files': {
        'embeddings': 'bert_embeddings.npy',
        'response_ids': 'response_ids.npy',
        'pickle': 'bert_embeddings_with_ids.pkl',
        'dataframe': 'bert_embeddings_df.parquet'
    }
}

metadata_file = os.path.join(embeddings_dir, "metadata.json")
import json
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"✓ Saved metadata to: {metadata_file}")
print("\n🎉 All embeddings saved successfully with responseId as identifier!")

Saving embeddings to disk...
✓ Saved embeddings to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/bert_embeddings.npy
✓ Saved responseIds to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/response_ids.npy
✓ Saved complete embedding data to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/bert_embeddings_with_ids.pkl
✓ Saved complete embedding data to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/bert_embeddings_with_ids.pkl
✓ Saved DataFrame to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/bert_embeddings_df.parquet
✓ Saved metadata to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/metadata.json

🎉 All embeddings saved successfully with responseId as identifier!
✓ Saved DataFrame to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/bert_embeddings_df.parquet
✓ Saved metadata to: /home/joaquino/portuguese-llm/embeddings/bert_embeddings/metadata.json

🎉 All embeddings saved successfully with responseId as identifie

In [11]:
# Quick verification and example usage
print("=== Verification ===")

# Load and verify one format
loaded_embeddings = np.load(embeddings_file)
loaded_ids = np.load(ids_file)

print(f"Loaded embeddings shape: {loaded_embeddings.shape}")
print(f"Loaded responseIds shape: {loaded_ids.shape}")
print(f"Embeddings match: {np.array_equal(embeddings, loaded_embeddings)}")
print(f"ResponseIds match: {np.array_equal(embedding_ids, loaded_ids)}")

# Show some sample embeddings
print(f"\nSample embedding (first 10 dimensions): {loaded_embeddings[0][:10]}")
print(f"Sample responseId: {loaded_ids[0]}")
print(f"Sample text: {texts[0][:100]}...")

# Memory cleanup
del embeddings, loaded_embeddings
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()

print("\n✅ Verification complete!")

=== Verification ===
Loaded embeddings shape: (24000, 1024)
Loaded responseIds shape: (24000,)
Embeddings match: True
ResponseIds match: True

Sample embedding (first 10 dimensions): [ 0.28160176  0.13340287  0.08593579 -0.1962112  -0.46568924  0.68999684
  1.4194202  -0.0229957  -1.4196646   1.8162141 ]
Sample responseId: 407b48b9-b0ed-5658-bba7-4180c43cd30c
Sample text: Eu sou um homem negro, nascido e criado no Norte do Brasil. Minha pele tem um tom profundo, que carr...

✅ Verification complete!

✅ Verification complete!


In [12]:
# Helper functions for loading embeddings in future work
def load_bert_embeddings(embeddings_dir="/home/joaquino/portuguese-llm/embeddings/bert_embeddings"):
    """
    Load BERT embeddings from disk
    
    Returns:
        tuple: (embeddings, response_ids) as numpy arrays
    """
    embeddings_file = os.path.join(embeddings_dir, "bert_embeddings.npy")
    ids_file = os.path.join(embeddings_dir, "response_ids.npy")
    
    embeddings = np.load(embeddings_file)
    response_ids = np.load(ids_file)
    
    return embeddings, response_ids

def load_bert_embeddings_with_metadata(embeddings_dir="/home/joaquino/portuguese-llm/embeddings/bert_embeddings"):
    """
    Load BERT embeddings with full metadata from pickle file
    
    Returns:
        dict: Dictionary containing embeddings, response_ids, and metadata
    """
    pickle_file = os.path.join(embeddings_dir, "bert_embeddings_with_ids.pkl")
    
    with open(pickle_file, 'rb') as f:
        data = pickle.load(f)
    
    return data

def load_bert_embeddings_as_dataframe(embeddings_dir="/home/joaquino/portuguese-llm/embeddings/bert_embeddings"):
    """
    Load BERT embeddings as pandas DataFrame
    
    Returns:
        pd.DataFrame: DataFrame with embeddings and responseId
    """
    df_file = os.path.join(embeddings_dir, "bert_embeddings_df.parquet")
    return pd.read_parquet(df_file)

def get_embedding_by_response_id(response_id, embeddings_dir="/home/joaquino/portuguese-llm/embeddings/bert_embeddings"):
    """
    Get embedding for a specific responseId
    
    Args:
        response_id: The responseId to look for
        embeddings_dir: Directory containing the embeddings
    
    Returns:
        numpy.ndarray: The embedding vector for the given responseId, or None if not found
    """
    embeddings, response_ids = load_bert_embeddings(embeddings_dir)
    
    # Convert to list for index lookup if needed
    if not isinstance(response_ids, list):
        response_ids = response_ids.tolist()
    
    try:
        idx = response_ids.index(response_id)
        return embeddings[idx]
    except ValueError:
        print(f"ResponseId {response_id} not found in embeddings")
        return None

print("Helper functions defined. You can use these to load embeddings in other notebooks:")
print("- load_bert_embeddings() -> returns (embeddings, response_ids)")
print("- load_bert_embeddings_with_metadata() -> returns dict with all metadata")  
print("- load_bert_embeddings_as_dataframe() -> returns DataFrame with responseId column")
print("- get_embedding_by_response_id(response_id) -> returns specific embedding")

Helper functions defined. You can use these to load embeddings in other notebooks:
- load_bert_embeddings() -> returns (embeddings, response_ids)
- load_bert_embeddings_with_metadata() -> returns dict with all metadata
- load_bert_embeddings_as_dataframe() -> returns DataFrame with responseId column
- get_embedding_by_response_id(response_id) -> returns specific embedding


In [13]:
# Improved helper function for loading embeddings as DataFrame with better organization
def load_embeddings_dataframe_with_responseId(embeddings_dir="/home/joaquino/portuguese-llm/embeddings/bert_embeddings", 
                                              responseId_first=True):
    """
    Load BERT embeddings as pandas DataFrame with responseId column
    
    Args:
        embeddings_dir (str): Directory containing the embeddings
        responseId_first (bool): If True, puts responseId as the first column for easier access
    
    Returns:
        pd.DataFrame: DataFrame with responseId and embedding dimensions
                     - If responseId_first=True: columns are ['responseId', '0', '1', ..., 'n']
                     - If responseId_first=False: columns are ['0', '1', ..., 'n', 'responseId']
    """
    # Load the parquet file
    df_file = os.path.join(embeddings_dir, "bert_embeddings_df.parquet")
    
    if not os.path.exists(df_file):
        raise FileNotFoundError(f"Embedding DataFrame file not found: {df_file}")
    
    df = pd.read_parquet(df_file)
    
    if responseId_first:
        # Reorganize columns to put responseId first
        embedding_cols = [col for col in df.columns if col != 'responseId']
        df = df[['responseId'] + embedding_cols]
    
    return df

def get_embeddings_by_responseIds(response_ids_list, embeddings_dir="/home/joaquino/portuguese-llm/embeddings/bert_embeddings"):
    """
    Get embeddings for specific responseIds
    
    Args:
        response_ids_list (list): List of responseIds to retrieve
        embeddings_dir (str): Directory containing the embeddings
    
    Returns:
        pd.DataFrame: DataFrame with embeddings for the specified responseIds
    """
    df = load_embeddings_dataframe_with_responseId(embeddings_dir, responseId_first=True)
    
    # Filter by responseIds
    filtered_df = df[df['responseId'].isin(response_ids_list)]
    
    if filtered_df.empty:
        print(f"No embeddings found for the provided responseIds")
        return pd.DataFrame()
    
    print(f"Found {len(filtered_df)} embeddings out of {len(response_ids_list)} requested responseIds")
    return filtered_df

def get_embedding_array_by_responseId(response_id, embeddings_dir="/home/joaquino/portuguese-llm/embeddings/bert_embeddings"):
    """
    Get just the embedding array for a single responseId
    
    Args:
        response_id (str): The responseId to look for
        embeddings_dir (str): Directory containing the embeddings
    
    Returns:
        numpy.ndarray: The embedding vector, or None if not found
    """
    df = load_embeddings_dataframe_with_responseId(embeddings_dir, responseId_first=True)
    
    # Find the row with the specified responseId
    row = df[df['responseId'] == response_id]
    
    if row.empty:
        print(f"ResponseId {response_id} not found in embeddings")
        return None
    
    # Return just the embedding values (exclude responseId column)
    embedding_cols = [col for col in df.columns if col != 'responseId']
    return row[embedding_cols].values[0]

print("Enhanced helper functions for working with responseId:")
print("- load_embeddings_dataframe_with_responseId(responseId_first=True)")
print("- get_embeddings_by_responseIds(['id1', 'id2', ...])")
print("- get_embedding_array_by_responseId('single_id')")

Enhanced helper functions for working with responseId:
- load_embeddings_dataframe_with_responseId(responseId_first=True)
- get_embeddings_by_responseIds(['id1', 'id2', ...])
- get_embedding_array_by_responseId('single_id')


In [14]:
# Demo: Load DataFrame with responseId as first column
print("=== Loading DataFrame with responseId as first column ===")
df_with_responseId = load_embeddings_dataframe_with_responseId(responseId_first=True)
print(f"DataFrame shape: {df_with_responseId.shape}")
print(f"Columns (first 5): {list(df_with_responseId.columns[:5])}")
print(f"ResponseId column is first: {df_with_responseId.columns[0] == 'responseId'}")
print("\nFirst few rows:")
df_with_responseId.head(3)

=== Loading DataFrame with responseId as first column ===
DataFrame shape: (24000, 1025)
Columns (first 5): ['responseId', '0', '1', '2', '3']
ResponseId column is first: True

First few rows:
DataFrame shape: (24000, 1025)
Columns (first 5): ['responseId', '0', '1', '2', '3']
ResponseId column is first: True

First few rows:


Unnamed: 0,responseId,0,1,2,3,4,5,6,7,8,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,407b48b9-b0ed-5658-bba7-4180c43cd30c,0.281602,0.133403,0.085936,-0.196211,-0.465689,0.689997,1.41942,-0.022996,-1.419665,...,0.914445,-1.052605,0.186389,0.624883,0.774898,0.47864,0.088727,0.623736,-0.429235,1.738742
1,68156cbb-c93c-5d05-9ba6-cbb2e6d6ee55,-0.345559,0.024747,0.020951,-0.021848,-0.736197,0.712839,1.39493,-0.341191,-1.764418,...,1.15034,-1.160325,-0.744703,0.240948,0.705257,0.636925,0.261278,0.685762,-0.413341,1.549394
2,da84a465-0723-5ccd-a449-65c89840bc1e,0.374511,0.278898,0.307217,-0.265869,-0.455923,0.532898,1.256891,-0.050374,-1.349318,...,1.045205,-1.101055,0.355464,1.046495,0.737988,0.647583,0.006477,1.062259,-0.488595,1.866999
