In [1]:
import re

MULTILINESTRING_REGEX = re.compile(r'^\s*MULTILINESTRING\s*\(\s*(.+?)\s*\)\s*$', re.I | re.S)
PAIR_REGEX = re.compile(r'(-?\d+(?:\.\d+)?)\s+(-?\d+(?:\.\d+)?)')  # (lon lat)

def parse_multilinestring(s: str) -> list[list[tuple[float, float]]]:
    """
    Parse MULTILINESTRING into list of segments.
    Returns [ [(lon,lat), ...], ... ], skipping segments with <2 points.
    Empty list means true parse failure.
    """
    if not isinstance(s, str):
        return []
    m = MULTILINESTRING_REGEX.match(s.strip())
    if not m:
        return []
    inner = m.group(1)
    seg_strs = re.findall(r'\(\s*([^\)]*?)\s*\)', inner)
    lines: list[list[tuple[float, float]]] = []
    for seg in seg_strs:
        pts = [(float(x), float(y)) for (x, y) in PAIR_REGEX.findall(seg)]
        if len(pts) >= 2:
            lines.append(pts)
    return lines

In [2]:
import pandas as pd
df = pd.read_csv("/Users/machaax/Desktop/ROPES/ropes/data/sample.csv")
df.columns

Index(['Unnamed: 0', 'OBJECTID', 'ID', 'SUB_1', 'SUB_2', 'VOLTAGE', 'TYPE',
       'STATUS', 'SHAPE_Length', 'geometry'],
      dtype='object')

In [3]:
df['parsed'] = df['geometry'].apply(parse_multilinestring)

In [4]:
df['geometry'].iloc[10]

'MULTILINESTRING ((-81.8951368 35.3493237, -81.8947923 35.3491632, -81.8951786 35.3486761, -81.896675 35.3467405, -81.8979625 35.345235, -81.8979368 35.3437489, -81.8972616 35.3412667, -81.8970365 35.3390041, -81.896676 35.3369321, -81.8963967 35.3354948, -81.8960788 35.3337498, -81.8953985 35.3300535, -81.8950568 35.3281895, -81.8947122 35.3262456, -81.8944775 35.3250602, -81.8941715 35.3233561, -81.8937823 35.3212399, -81.8934465 35.3193443, -81.8930336 35.3170873, -81.8927602 35.3156449, -81.8923384 35.3134364, -81.8919908 35.3114702, -81.8917353 35.3096398, -81.8914174 35.3082845, -81.8908974 35.3053532, -81.8906598 35.3038063, -81.890431 35.3023588, -81.8902111 35.3009597, -81.8898249 35.2986611, -81.889611 35.2972208, -81.8892604 35.2949826, -81.8888742 35.2927759, -81.8886157 35.2911269, -81.8884776 35.2900644, -81.8882851 35.2897879, -81.8882102 35.289721))'

In [5]:
df['parsed'].iloc[10]

[[(-81.8951368, 35.3493237),
  (-81.8947923, 35.3491632),
  (-81.8951786, 35.3486761),
  (-81.896675, 35.3467405),
  (-81.8979625, 35.345235),
  (-81.8979368, 35.3437489),
  (-81.8972616, 35.3412667),
  (-81.8970365, 35.3390041),
  (-81.896676, 35.3369321),
  (-81.8963967, 35.3354948),
  (-81.8960788, 35.3337498),
  (-81.8953985, 35.3300535),
  (-81.8950568, 35.3281895),
  (-81.8947122, 35.3262456),
  (-81.8944775, 35.3250602),
  (-81.8941715, 35.3233561),
  (-81.8937823, 35.3212399),
  (-81.8934465, 35.3193443),
  (-81.8930336, 35.3170873),
  (-81.8927602, 35.3156449),
  (-81.8923384, 35.3134364),
  (-81.8919908, 35.3114702),
  (-81.8917353, 35.3096398),
  (-81.8914174, 35.3082845),
  (-81.8908974, 35.3053532),
  (-81.8906598, 35.3038063),
  (-81.890431, 35.3023588),
  (-81.8902111, 35.3009597),
  (-81.8898249, 35.2986611),
  (-81.889611, 35.2972208),
  (-81.8892604, 35.2949826),
  (-81.8888742, 35.2927759),
  (-81.8886157, 35.2911269),
  (-81.8884776, 35.2900644),
  (-81.8882851, 35.

In [6]:
df['parsed'].iloc[10][0]

[(-81.8951368, 35.3493237),
 (-81.8947923, 35.3491632),
 (-81.8951786, 35.3486761),
 (-81.896675, 35.3467405),
 (-81.8979625, 35.345235),
 (-81.8979368, 35.3437489),
 (-81.8972616, 35.3412667),
 (-81.8970365, 35.3390041),
 (-81.896676, 35.3369321),
 (-81.8963967, 35.3354948),
 (-81.8960788, 35.3337498),
 (-81.8953985, 35.3300535),
 (-81.8950568, 35.3281895),
 (-81.8947122, 35.3262456),
 (-81.8944775, 35.3250602),
 (-81.8941715, 35.3233561),
 (-81.8937823, 35.3212399),
 (-81.8934465, 35.3193443),
 (-81.8930336, 35.3170873),
 (-81.8927602, 35.3156449),
 (-81.8923384, 35.3134364),
 (-81.8919908, 35.3114702),
 (-81.8917353, 35.3096398),
 (-81.8914174, 35.3082845),
 (-81.8908974, 35.3053532),
 (-81.8906598, 35.3038063),
 (-81.890431, 35.3023588),
 (-81.8902111, 35.3009597),
 (-81.8898249, 35.2986611),
 (-81.889611, 35.2972208),
 (-81.8892604, 35.2949826),
 (-81.8888742, 35.2927759),
 (-81.8886157, 35.2911269),
 (-81.8884776, 35.2900644),
 (-81.8882851, 35.2897879),
 (-81.8882102, 35.289721)

In [7]:
df['parsed'].iloc[10][1]

IndexError: list index out of range