In [2]:
#| default_exp data_processing


In [3]:
#| export
import pandas as pd
import numpy as np
from typing import Dict, Any, List
from snowflake.snowpark import Session
from snowflake.snowpark.dataframe import DataFrame
import yaml


In [5]:
#| export
def parse_reviews(batch: Dict[str, Any]) -> Dict[str, Any]:
    """Parse reviews to extract UUID and review text from the input string.
    
    Args:
        batch: Dictionary containing 'text' and 'file_name' keys
        
    Returns:
        Dictionary with parsed UUID and review text
    """
    # Initialize empty dictionary for results
    parsed_data = {}
    
    value = batch["text"]
    # Split on the first occurrence of comma
    parts = value.split('","', 1)
    
    # Clean up the UUID (remove leading/trailing quotes)
    uuid = parts[0].strip('"')
    
    # Clean up the review text (remove trailing quote)
    review_text = parts[1].rstrip('"')
    
    # Store parsed values
    parsed_data['UUID'] = uuid
    parsed_data['REVIEW_TEXT'] = review_text
        
    return parsed_data


In [6]:
# Test parse_reviews function
test_batch = {"text": '"uuid123","This is a great product review!"'}
result = parse_reviews(test_batch)
assert result['UUID'] == 'uuid123'
assert result['REVIEW_TEXT'] == 'This is a great product review!'
print("✓ parse_reviews test passed")


✓ parse_reviews test passed
