# Edge Case Testing
This notebook tests edge cases and error handling for `qutePandas` against `pandas` baselines.

In [1]:
import os
import sys
import importlib
import pandas as pd
import numpy as np
import pykx as kx
sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('.'))
import qutePandas as qpd
importlib.reload(qpd)
from test_utils import verify_correctness
local_lic = os.path.abspath('../kdb_lic')
if os.path.exists(local_lic): os.environ['QLIC'] = local_lic
qpd.connect()
print('Setup Complete')


  warn(f'Configuration value QLIC set to non directory value: {_qlic}')


Setup Complete


## Empty Tables
Tests handling of empty DataFrames (0 rows).

In [2]:
df_empty = pd.DataFrame(columns=['a', 'b'])
q_empty = qpd.DataFrame(df_empty)
pd_res = df_empty.dropna()
q_res = qpd.dropna(q_empty, return_type='p')
assert verify_correctness(pd_res, q_res)
pd_res = df_empty.fillna({'a': 0})
q_res = qpd.fillna(q_empty, 'a', 0, return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Empty Tables')


Passed: Empty Tables


## Single Column Tables
Tests operations on single-column DataFrames.

In [3]:
df_single = pd.DataFrame({'a': [1, 2, 3]})
q_single = qpd.DataFrame(df_single)
pd_res = df_single.rename(columns={'a': 'x'})
q_res = qpd.rename(q_single, {'a': 'x'}, return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Single Column Tables')


Passed: Single Column Tables


## Error Handling: Missing Columns
Tests that operations raise appropriate errors when targeting non-existent columns.

In [4]:
df = pd.DataFrame({'a': [1]})
q_table = qpd.DataFrame(df)
try:
    qpd.drop_col(q_table, 'missing', return_type='p')
    assert False, 'Should have raised RuntimeError'
except RuntimeError:
    pass
try:
    qpd.groupby_sum(q_table, 'missing', 'a', return_type='p')
    assert False, 'Should have raised RuntimeError'
except RuntimeError:
    pass
print('Passed: Error Handling: Missing Columns')


Passed: Error Handling: Missing Columns


## Error Handling: Incorrect inputs
Tests that operations raise errors for invalid input types (e.g., list instead of DataFrame).

In [5]:
try:
    qpd.dropna([1, 2, 3], return_type='p')
    assert False, 'Should have raised ValueError/RuntimeError'
except (ValueError, RuntimeError):
    pass
print('Passed: Error Handling: Incorrect inputs')


Passed: Error Handling: Incorrect inputs


## Null-Heavy Columns
Tests handling of columns with majorly null values.

In [6]:
df_nulls = pd.DataFrame({'a': [None]*100, 'b': range(100)})
q_nulls = qpd.DataFrame(df_nulls)
pd_res = df_nulls.dropna()
q_res = qpd.dropna(q_nulls, return_type='p')
assert verify_correctness(pd_res, q_res)
pd_res = df_nulls[df_nulls['a'].notna()]
q_res = qpd.dropna_col(q_nulls, 'a', return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Null-Heavy Columns')


Passed: Null-Heavy Columns


## Mixed Data Types
Tests casting operations on mixed-type DataFrames.

In [7]:
df_mixed = pd.DataFrame({'a': [1, 2], 'b': ['x', 'y'], 'c': [1.1, 2.2]})
q_mixed = qpd.DataFrame(df_mixed)
pd_res = df_mixed.astype({'a': 'float64'})
q_res = qpd.cast(q_mixed, 'a', 'float64', return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Mixed Data Types')


Passed: Mixed Data Types


## Wide Tables
Tests operations on tables with many columns (500+).

In [8]:
wide_df = pd.DataFrame({f'c{i}': [i] for i in range(500)})
q_wide = qpd.DataFrame(wide_df)
pd_res = wide_df.drop(columns=['c250'])
q_res = qpd.drop_col(q_wide, 'c250', return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Wide Tables')


Passed: Wide Tables


## Duplicate Column Names
Tests handling (or graceful failure) of duplicate column names.

In [9]:
df_dup = pd.DataFrame([[1, 2]], columns=['a', 'a'])
try:
    q_dup = qpd.DataFrame(df_dup)
    q_res = qpd.dropna(q_dup, return_type='p')
    print('Passed: Duplicate Column Names')
except:
    print('Expected Failure: Duplicate Column Names (or passed with auto-renaming)')


Expected Failure: Duplicate Column Names (or passed with auto-renaming)


## Return Type Validation
Tests strict enforcement of 'p' and 'q' return types and error handling for invalid types.

In [10]:
df_test = pd.DataFrame({'a': [1, 2, None], 'b': ['x', 'y', 'z']})
q_test = kx.toq(df_test)

res_p = qpd.dropna(q_test, return_type='p')
assert isinstance(res_p, pd.DataFrame)

res_q = qpd.dropna(q_test, return_type='q')
assert isinstance(res_q, (kx.Table, kx.KeyedTable))

try:
    qpd.dropna(q_test, return_type='pandas')
    assert False, "Should raise ValueError for 'pandas'"
except (ValueError, RuntimeError) as e:
    assert "Invalid return_type" in str(e)

res_sum_p = qpd.groupby_sum(q_test, 'b', 'a', return_type='p')
assert isinstance(res_sum_p, pd.DataFrame)

res_apply_p = qpd.apply(q_test, 'count', axis=0, return_type='p')
assert isinstance(res_apply_p, (pd.DataFrame, pd.Series, dict))

print('Passed: Return Type Validation')


Passed: Return Type Validation


## Introspection: dtypes
Tests dtypes.

In [11]:
# Basic Functional & Return Type Test
df = pd.DataFrame({'a': [1, 2], 'b': ['x', 'y']})
q_table = kx.toq(df)

# Test q return
res_q = qpd.dtypes(q_table, return_type='q')
assert isinstance(res_q, (kx.Table, kx.KeyedTable)), "return_type='q' mismatch"

# Test p return
res_p = qpd.dtypes(q_table, return_type='p')
assert isinstance(res_p, pd.DataFrame), "return_type='p' mismatch"
assert 'a' in res_p.index or 'a' in res_p.iloc[:, 0].values

# Empty DataFrame
df_empty = pd.DataFrame(columns=['a', 'b'])
q_empty = qpd.DataFrame(df_empty)
res = qpd.dtypes(q_empty, return_type='p')
assert len(res) == 2

# Null columns
df_null = pd.DataFrame({'a': [None, None], 'b': [1, 2]})
q_null = qpd.DataFrame(df_null)
res = qpd.dtypes(q_null, return_type='p')
assert len(res) == 2

print('Passed: dtypes Introspection')

Passed: dtypes Introspection


## Indexing Edge Cases
Tests for `loc` and `iloc` error handling and boundary conditions.

In [12]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
q_table = qpd.DataFrame(df)

# Unknown column in loc
try:
    qpd.loc(q_table, cols='z', return_type='p')
    print("Failed to raise error for unknown col")
except (ValueError, RuntimeError) as e:
    print(f"Caught expected error for unknown col: {e}")

# Empty selection iloc
res = qpd.iloc(q_table, rows=[], return_type='p')
assert len(res) == 0, f"Expected 0 rows, got {len(res)}"
print("Empty selection verified")

# Out of bounds slice (iloc)
res = qpd.iloc(q_table, rows=slice(0, 100), return_type='p')
assert len(res) == 3, f"Expected 3 rows (clipped), got {len(res)}"
print("Out of bounds slice verified")

Caught expected error for unknown col: Column 'z' not found in table.
Empty selection verified
Out of bounds slice verified


## Test print Edge Cases
Test print with empty tables and large row counts.

In [None]:
print('Testing qpd.print with empty table:')
empty_df = pd.DataFrame({'a': [], 'b': []})
q_empty = qpd.DataFrame(empty_df)
qpd.print(q_empty)

print('\nTesting qpd.print with large table (n=10):')
large_df = pd.DataFrame({'x': range(1000), 'y': range(1000, 2000)})
q_large = qpd.DataFrame(large_df)
qpd.print(q_large, n=10)

print('\nEdge case tests passed')