# Edge Case Testing
This notebook tests edge cases and error handling for `qutePandas` against `pandas` baselines.

In [None]:
import os
import sys
sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('.'))
import qutePandas as qpd
import pandas as pd
import numpy as np
import pykx as kx
from test_utils import verify_correctness
local_lic = os.path.abspath('../kdb_lic')
if os.path.exists(local_lic): os.environ['QLIC'] = local_lic
qpd.connect()
print('Setup Complete')


## Empty Tables
Tests handling of empty DataFrames (0 rows).

In [None]:
df_empty = pd.DataFrame(columns=['a', 'b'])
q_empty = qpd.DataFrame(df_empty)
pd_res = df_empty.dropna()
q_res = qpd.dropna(q_empty, return_type='p')
assert verify_correctness(pd_res, q_res)
pd_res = df_empty.fillna({'a': 0})
q_res = qpd.fillna(q_empty, 'a', 0, return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Empty Tables')


## Single Column Tables
Tests operations on single-column DataFrames.

In [None]:
df_single = pd.DataFrame({'a': [1, 2, 3]})
q_single = qpd.DataFrame(df_single)
pd_res = df_single.rename(columns={'a': 'x'})
q_res = qpd.rename(q_single, {'a': 'x'}, return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Single Column Tables')


## Error Handling: Missing Columns
Tests that operations raise appropriate errors when targeting non-existent columns.

In [None]:
df = pd.DataFrame({'a': [1]})
q_table = qpd.DataFrame(df)
try:
    qpd.drop_col(q_table, 'missing', return_type='p')
    assert False, 'Should have raised RuntimeError'
except RuntimeError:
    pass
try:
    qpd.groupby_sum(q_table, 'missing', 'a', return_type='p')
    assert False, 'Should have raised RuntimeError'
except RuntimeError:
    pass
print('Passed: Error Handling: Missing Columns')


## Error Handling: Incorrect inputs
Tests that operations raise errors for invalid input types (e.g., list instead of DataFrame).

In [None]:
try:
    qpd.dropna([1, 2, 3], return_type='p')
    assert False, 'Should have raised ValueError/RuntimeError'
except (ValueError, RuntimeError):
    pass
print('Passed: Error Handling: Incorrect inputs')


## Null-Heavy Columns
Tests handling of columns with majorly null values.

In [None]:
df_nulls = pd.DataFrame({'a': [None]*100, 'b': range(100)})
q_nulls = qpd.DataFrame(df_nulls)
pd_res = df_nulls.dropna()
q_res = qpd.dropna(q_nulls, return_type='p')
assert verify_correctness(pd_res, q_res)
pd_res = df_nulls[df_nulls['a'].notna()]
q_res = qpd.dropna_col(q_nulls, 'a', return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Null-Heavy Columns')


## Mixed Data Types
Tests casting operations on mixed-type DataFrames.

In [None]:
df_mixed = pd.DataFrame({'a': [1, 2], 'b': ['x', 'y'], 'c': [1.1, 2.2]})
q_mixed = qpd.DataFrame(df_mixed)
pd_res = df_mixed.astype({'a': 'float64'})
q_res = qpd.cast(q_mixed, 'a', 'float64', return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Mixed Data Types')


## Wide Tables
Tests operations on tables with many columns (500+).

In [None]:
wide_df = pd.DataFrame({f'c{i}': [i] for i in range(500)})
q_wide = qpd.DataFrame(wide_df)
pd_res = wide_df.drop(columns=['c250'])
q_res = qpd.drop_col(q_wide, 'c250', return_type='p')
assert verify_correctness(pd_res, q_res)
print('Passed: Wide Tables')


## Duplicate Column Names
Tests handling (or graceful failure) of duplicate column names.

In [None]:
df_dup = pd.DataFrame([[1, 2]], columns=['a', 'a'])
try:
    q_dup = qpd.DataFrame(df_dup)
    q_res = qpd.dropna(q_dup, return_type='p')
    print('Passed: Duplicate Column Names')
except:
    print('Expected Failure: Duplicate Column Names (or passed with auto-renaming)')


## Return Type Validation
Tests strict enforcement of 'p' and 'q' return types and error handling for invalid types.

In [None]:
df_test = pd.DataFrame({'a': [1, 2, None], 'b': ['x', 'y', 'z']})
q_test = kx.toq(df_test)

res_p = qpd.dropna(q_test, return_type='p')
assert isinstance(res_p, pd.DataFrame)

res_q = qpd.dropna(q_test, return_type='q')
assert isinstance(res_q, (kx.Table, kx.KeyedTable))

try:
    qpd.dropna(q_test, return_type='pandas')
    assert False, "Should raise ValueError for 'pandas'"
except ValueError as e:
    assert "Invalid return_type" in str(e)

res_sum_p = qpd.groupby_sum(q_test, 'b', 'a', return_type='p')
assert isinstance(res_sum_p, pd.DataFrame)

res_apply_p = qpd.apply(q_test, 'sum', axis=0, return_type='p')
assert isinstance(res_apply_p, (pd.DataFrame, pd.Series))

print('Passed: Return Type Validation')
