# Ripgrep Contextual Search Example
This notebook demonstrates how to use ripgrep to search for patterns in files, including lines before and after each match, and store the results in a pandas DataFrame for contextual analysis.

In [1]:
# Import required libraries
import subprocess
import pandas as pd

## Define ripgrep search function with context
This function runs ripgrep with options to include lines before and after each match, and parses the output into a DataFrame.

In [None]:
def ripgrep_search_with_context(pattern, path='.', before=2, after=2, extra_args=None):
    cmd = ['rg', '--vimgrep', f'-B{before}', f'-A{after}', pattern, path]
    if extra_args:
        cmd.extend(extra_args)
    result = subprocess.run(cmd, capture_output=True, text=True)
    lines = result.stdout.strip().split('\n')
    data = []
    for line in lines:
        line_type = 'match'
        if line.startswith('--') or line.strip() == '':
            continue  # skip separator or empty lines
        if '-' in line and not line.startswith('-'):
            # context line, e.g. file-path-line-text
            parts = line.split('-', 2)
            if len(parts) == 3:
                file, line_num, text = parts
                data.append({'file': file, 'line': int(line_num), 'col': None, 'text': text, 'type': 'context'})
            continue
        if ':' in line:
            # match line, e.g. file-path:line:col:text
            parts = line.split(':', 3)
            if len(parts) == 4:
                file, line_num, col_num, text = parts
                try:
                    line_num = int(line_num)
                    col_num = int(col_num)
                except ValueError:
                    continue
                data.append({'file': file, 'line': line_num, 'col': col_num, 'text': text, 'type': 'match'})
            continue
    return pd.DataFrame(data)

## Run a contextual search
Search for the word 'def' in Python files, including 2 lines before and after each match, and display results.

In [13]:
df = ripgrep_search_with_context('def', path='C:/work/GitHub/dec-tree-py', before=2, after=2, extra_args=['--type', 'py'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Empty DataFrame


In [12]:
# Debug: Print raw ripgrep output to help diagnose parsing issues
cmd = ['rg', '--vimgrep', '-B2', '-A2', 'def', 'C:/work/GitHub/dec-tree-py', '--type', 'py']
result = subprocess.run(cmd, capture_output=True, text=True)
print(result.stdout)

C:/work/GitHub/dec-tree-py\Python\tests\m_classifier_test.py-1-from src.dec_tree.m_classifier import M_Classifier
C:/work/GitHub/dec-tree-py\Python\tests\m_classifier_test.py-2-
C:/work/GitHub/dec-tree-py\Python\tests\m_classifier_test.py:3:1:def test_m_classifier():
C:/work/GitHub/dec-tree-py\Python\tests\m_classifier_test.py-4-    m_classifier = M_Classifier()
C:/work/GitHub/dec-tree-py\Python\tests\m_classifier_test.py-5-    assert m_classifier() < 1.0
--
C:/work/GitHub/dec-tree-py\Python\src\child_channel.py-14-    STDOUT_END = 'out_end_###'
C:/work/GitHub/dec-tree-py\Python\src\child_channel.py-15-
C:/work/GitHub/dec-tree-py\Python\src\child_channel.py:16:5:    def __init__(self):
C:/work/GitHub/dec-tree-py\Python\src\child_channel.py-17-        """
C:/work/GitHub/dec-tree-py\Python\src\child_channel.py-18-        docstring
--
C:/work/GitHub/dec-tree-py\Python\src\child_channel.py-22-        self.json_object = None
C:/work/GitHub/dec-tree-py\Python\src\child_channel.py-23-
C:/work