In [4]:
import csv
import random
import ast
import operator as op

# Define supported operators
operators = {
    ast.Eq: op.eq,
    ast.NotEq: op.ne,
    ast.Lt: op.lt,
    ast.LtE: op.le,
    ast.Gt: op.gt,
    ast.GtE: op.ge,
    ast.And: op.and_,
    ast.Or: op.or_,
}

# Function to generate dummy CSV data with specified number of rows
def generate_dummy_csv(file_path, num_rows):
    fieldnames = ['id', 'name', 'age', 'city']
    with open(file_path, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for i in range(num_rows):
            writer.writerow({
                'id': i + 1,
                'name': f'Person {i + 1}',
                'age': random.randint(20, 60),
                'city': random.choice(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'])
            })

# Function to read CSV in chunks
def read_csv_in_chunks(file_path, chunk_size=10000):
    with open(file_path, 'r') as file:
        reader = csv.DictReader(file)
        chunk = []
        for row in reader:
            chunk.append(row)
            if len(chunk) >= chunk_size:
                yield chunk
                chunk = []
        if chunk:
            yield chunk

# Function to evaluate filter expression with type conversion
def eval_expr(expr, row):
    def _eval(node):
        if isinstance(node, ast.Num):
            return node.n
        elif isinstance(node, ast.Str):
            return node.s
        elif isinstance(node, ast.Name):
            value = row[node.id]
            try:
                return int(value)
            except ValueError:
                return value
        elif isinstance(node, ast.BinOp):
            return operators[type(node.op)](_eval(node.left), _eval(node.right))
        elif isinstance(node, ast.BoolOp):
            return operators[type(node.op)](*(_eval(value) for value in node.values))
        elif isinstance(node, ast.Compare):
            left = _eval(node.left)
            rights = [_eval(comp) for comp in node.comparators]
            return operators[type(node.ops[0])](left, rights[0])
        else:
            raise TypeError(f"Unsupported node type: {type(node)}")

    try:
        node = ast.parse(expr, mode='eval').body
    except SyntaxError as e:
        raise ValueError(f"Invalid filter expression: {expr}") from e

    return _eval(node)

# Function to filter rows based on expression
def filter_rows(rows, filter_expression):
    return [row for row in rows if eval_expr(filter_expression, row)]

# Function to paginate results
def paginate(rows, page, page_size):
    start = (page - 1) * page_size
    end = start + page_size
    return rows[start:end]

# Function to read and filter CSV with pagination
def read_and_filter_csv(file_path: str, filter_expression: str, page: int = 1, page_size: int = 50):
    filtered_rows = []
    for chunk in read_csv_in_chunks(file_path):
        filtered_chunk = filter_rows(chunk, filter_expression)
        filtered_rows.extend(filtered_chunk)
    return paginate(filtered_rows, page, page_size)

# Example usage and test
if __name__ == '__main__':

    csv_file = 'dummy_data.csv'
    generate_dummy_csv(csv_file, 10000)

    filter_expression = '(age >= 30) and (city == "New York")'

    page_number = 1
    results = read_and_filter_csv(csv_file, filter_expression, page_number)

    for row in results:
        print(row)


{'id': '1', 'name': 'Person 1', 'age': '52', 'city': 'New York'}
{'id': '13', 'name': 'Person 13', 'age': '40', 'city': 'New York'}
{'id': '15', 'name': 'Person 15', 'age': '59', 'city': 'New York'}
{'id': '17', 'name': 'Person 17', 'age': '39', 'city': 'New York'}
{'id': '18', 'name': 'Person 18', 'age': '59', 'city': 'New York'}
{'id': '21', 'name': 'Person 21', 'age': '51', 'city': 'New York'}
{'id': '29', 'name': 'Person 29', 'age': '45', 'city': 'New York'}
{'id': '33', 'name': 'Person 33', 'age': '46', 'city': 'New York'}
{'id': '36', 'name': 'Person 36', 'age': '40', 'city': 'New York'}
{'id': '39', 'name': 'Person 39', 'age': '45', 'city': 'New York'}
{'id': '40', 'name': 'Person 40', 'age': '55', 'city': 'New York'}
{'id': '51', 'name': 'Person 51', 'age': '60', 'city': 'New York'}
{'id': '57', 'name': 'Person 57', 'age': '34', 'city': 'New York'}
{'id': '59', 'name': 'Person 59', 'age': '54', 'city': 'New York'}
{'id': '60', 'name': 'Person 60', 'age': '37', 'city': 'New York