Skip to content

Commit

Permalink
Fix FixedString pandas type (#357)
Browse files Browse the repository at this point in the history
  • Loading branch information
genzgd committed Jun 4, 2024
1 parent 8515e1d commit 402e21d
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ release (0.8.0), unrecognized arguments/keywords for these methods of creating a
instead of being passed as ClickHouse server settings. This is in conjunction with some refactoring in Client construction.
The supported method of passing ClickHouse server settings is to prefix such arguments/query parameters with`ch_`.

## 0.7.12, 2024-06-04
### Bug Fix
- When using `query_df` with a FixedString column with a read format of 'string' (and the default `query_df` setting
`use_extended_dtypes=True`), the resulting column in the dataframe will now be correctly set to the (extended) String dtype.
Fixes https://github.com/ClickHouse/clickhouse-connect/issues/356

## 0.7.11, 2024-05-26
### Improvement
- Python or Pandas float value to ClickHouse Decimal now correctly rounds Float values for more accurate conversions. Thanks
Expand Down
2 changes: 1 addition & 1 deletion clickhouse_connect/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '0.7.11'
version = '0.7.12'
5 changes: 5 additions & 0 deletions clickhouse_connect/datatypes/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryConte
return source.read_fixed_str_col(self.byte_size, num_rows, ctx.encoding or self.encoding )
return source.read_bytes_col(self.byte_size, num_rows)

def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
if ctx.use_extended_dtypes and self.read_format(ctx) == 'string':
return pd.array(column, dtype=pd.StringDtype())
return column

# pylint: disable=too-many-branches,duplicate-code
def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
ext = dest.extend
Expand Down
9 changes: 6 additions & 3 deletions tests/integration_tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,18 @@ def test_pandas_date32(test_client: Client, table_context:Callable):


def test_pandas_row_df(test_client: Client, table_context:Callable):
with table_context('test_pandas_row_df', ['key UInt64', 'dt DateTime64(6)']):
with table_context('test_pandas_row_df', ['key UInt64', 'dt DateTime64(6)', 'fs FixedString(5)']):
df = pd.DataFrame({'key': [1, 2],
'dt': [pd.Timestamp(2023, 5, 4, 10, 20), pd.Timestamp(2023, 10, 15, 14, 50, 2, 4038)]})
'dt': [pd.Timestamp(2023, 5, 4, 10, 20), pd.Timestamp(2023, 10, 15, 14, 50, 2, 4038)],
'fs': ['seven', 'bit']})
df = df.iloc[1:]
source_df = df.copy()
test_client.insert_df('test_pandas_row_df', df)
result_df = test_client.query_df('SELECT * FROM test_pandas_row_df')
result_df = test_client.query_df('SELECT * FROM test_pandas_row_df', column_formats={'fs': 'string'})
assert str(result_df.dtypes[2]) == 'string'
assert result_df.iloc[0]['key'] == 2
assert result_df.iloc[0]['dt'] == pd.Timestamp(2023, 10, 15, 14, 50, 2, 4038)
assert result_df.iloc[0]['fs'] == 'bit\0\0'
assert len(result_df) == 1
assert source_df.equals(df)

Expand Down

0 comments on commit 402e21d

Please sign in to comment.