*This Script will help validate the tranche*

Balance Error means that in a given cell, the brackets or quotes are not properly matched.

Specifically, it checks for:
- Balanced brackets: [], {}, ()
- Even number of unescaped quotes: ' and "

In [14]:
import csv
import ast

def is_balanced(s):
    stack = []
    brackets = {'[': ']', '{': '}', '(': ')'}
    quotes = {"'": 0, '"': 0}

    i = 0
    while i < len(s):
        char = s[i]
        if char in quotes:
            if i == 0 or s[i - 1] != '\\':
                quotes[char] ^= 1
        elif char in brackets:
            stack.append(char)
        elif char in brackets.values():
            if not stack or brackets[stack.pop()] != char:
                return False
        i += 1
    return not stack and all(v == 0 for v in quotes.values())

def all_items_are_strings(obj):
    if isinstance(obj, str):
        return True
    elif isinstance(obj, list):
        return all(all_items_are_strings(item) for item in obj)
    else:
        return False

def is_list_string_valid(cell):
    if isinstance(cell, list):
        if not all_items_are_strings(cell):
            return False, "List (or nested list) contains non-string item"
        return True, ""

    stripped = cell.strip()
    if stripped.startswith('[') and stripped.endswith(']'):
        try:
            parsed = ast.literal_eval(stripped)
            if not isinstance(parsed, list):
                return False, "Not a valid list"
            if not all_items_are_strings(parsed):
                return False, "List (or nested list) contains non-string item"
        except Exception as e:
            return False, f"List parsing error: {e}"
    return True, ""

def check_csv(file_path):
    with open(file_path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        header = next(reader)
        for row_idx, row in enumerate(reader, start=2):
            for col_idx, cell in enumerate(row, start=1):
                cell = cell.strip()

                if not is_balanced(cell):
                    print(f"Row {row_idx}: Balance Error in column {col_idx}")
                    break

                if cell.startswith('[') and cell.endswith(']'):
                    valid_list, reason = is_list_string_valid(cell)
                    if not valid_list:
                        print(f"Row {row_idx}: List Format Error in column {col_idx} ({reason})")
                        break

# --- Run dynamically from user input ---
if __name__ == "__main__":
    file_path = input("Enter the path to your CSV file: ").strip()
    check_csv(file_path)


Row 22: List Format Error in column 11 (List (or nested list) contains non-string item)
Row 32: List Format Error in column 11 (List parsing error: malformed node or string on line 1: <ast.Subscript object at 0x00000213C3D81270>)
Row 33: List Format Error in column 11 (List parsing error: malformed node or string on line 1: <ast.Subscript object at 0x00000213C3D83100>)
Row 43: List Format Error in column 3 (List (or nested list) contains non-string item)
Row 45: Balance Error in column 11
Row 49: Balance Error in column 11
Row 50: Balance Error in column 13
Row 73: List Format Error in column 5 (List parsing error: malformed node or string on line 1: <ast.Name object at 0x00000213C2FEE8C0>)
Row 87: Balance Error in column 5
Row 94: Balance Error in column 15
Row 95: List Format Error in column 15 (List parsing error: invalid syntax. Perhaps you forgot a comma? (<unknown>, line 1))
Row 109: Balance Error in column 5
Row 110: List Format Error in column 13 (List (or nested list) contains