Skip to content

Commit

Permalink
Fix #62 allow complex numpy/pandas columns like strings and datetimes (
Browse files Browse the repository at this point in the history
…#69)

* add dataframe tests

* working on string types

* finalize issue #62

* adjust failing test
  • Loading branch information
gijzelaerr committed Jun 30, 2020
1 parent 6547642 commit d5fbf98
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 25 deletions.
57 changes: 34 additions & 23 deletions monetdbe/_cffi.py
Expand Up @@ -5,7 +5,7 @@
import logging
from pathlib import Path
from re import compile, DOTALL
from typing import Optional, Any, Dict, Tuple, Callable, Type
from typing import Optional, Any, Dict, Tuple, Callable

import numpy as np

Expand Down Expand Up @@ -83,21 +83,21 @@ def check_error(msg: ffi.CData) -> None:


# format: monetdb type: (cast name, converter function, numpy type, monetdb null value)
type_map: Dict[Any, Tuple[str, Optional[Callable], Optional[Type], Optional[Any]]] = {
type_map: Dict[Any, Tuple[str, Optional[Callable], np.dtype, Optional[Any]]] = {
lib.monetdbe_bool: ("bool", bool, np.dtype(np.bool), None),
lib.monetdbe_int8_t: ("int8_t", None, np.dtype(np.int8), np.iinfo(np.int8).min),
lib.monetdbe_int16_t: ("int16_t", None, np.dtype(np.int16), np.iinfo(np.int16).min),
lib.monetdbe_int32_t: ("int32_t", None, np.dtype(np.int32), np.iinfo(np.int32).min),
lib.monetdbe_int64_t: ("int64_t", None, np.dtype(np.int64), np.iinfo(np.int64).min),
lib.monetdbe_int128_t: ("int128_t", None, None, None),
lib.monetdbe_size_t: ("size_t", None, None, None),
lib.monetdbe_int128_t: ("int128_t", None, np.dtype(np.int64), None), # todo: add 128bit support
lib.monetdbe_size_t: ("size_t", None, np.dtype(np.uint), None),
lib.monetdbe_float: ("float", py_float, np.dtype(np.float), np.finfo(np.float).min),
lib.monetdbe_double: ("double", py_float, np.dtype(np.float), np.finfo(np.float).min),
lib.monetdbe_str: ("str", make_string, np.dtype(np.str), None),
lib.monetdbe_blob: ("blob", make_blob, None, None),
lib.monetdbe_date: ("date", py_date, np.dtype(np.datetime64), None),
lib.monetdbe_time: ("time", py_time, np.dtype(np.datetime64), None),
lib.monetdbe_timestamp: ("timestamp", py_timestamp, np.dtype(np.datetime64), None),
lib.monetdbe_str: ("str", make_string, np.dtype('=O'), None),
lib.monetdbe_blob: ("blob", make_blob, np.dtype('=O'), None),
lib.monetdbe_date: ("date", py_date, np.dtype('=O'), None), # np.dtype('datetime64[D]')
lib.monetdbe_time: ("time", py_time, np.dtype('=O'), None), # np.dtype('datetime64[ns]')
lib.monetdbe_timestamp: ("timestamp", py_timestamp, np.dtype('=O'), None), # np.dtype('datetime64[ns]')
}


Expand Down Expand Up @@ -169,21 +169,29 @@ def cleanup_result(self, result: ffi.CData):
if result and self._connection:
check_error(lib.monetdbe_cleanup_result(self._connection, result))

def open(self, dbdir: Optional[Path] = None):
def open(
self,
dbdir: Optional[Path] = None,
memorylimit: int = 0,
querytimeout: int = 0,
sessiontimeout: int = 0,
nr_threads: int = 0,
have_hge: bool = False
):

if not dbdir:
url = ffi.NULL
else:
url = str(dbdir).encode() # ffi.new("char[]", str(dbdir).encode())
url = str(dbdir).encode()

p_connection = ffi.new("monetdbe_database *")
# p_options = ffi.new("monetdbe_options *")
# p_options.memorylimit = 0
# p_options.querytimeout = 0
# p_options.sessiontimeout = 0
# p_options.nr_threads = 0
# p_options.have_hge = False

p_options = ffi.NULL
p_options = ffi.new("monetdbe_options *")
p_options.memorylimit = memorylimit
p_options.querytimeout = querytimeout
p_options.sessiontimeout = sessiontimeout
p_options.nr_threads = nr_threads
p_options.have_hge = have_hge

result_code = lib.monetdbe_open(p_connection, url, p_options)
connection = p_connection[0]
Expand All @@ -204,7 +212,7 @@ def open(self, dbdir: Optional[Path] = None):

return connection

def close(self):
def close(self) -> None:
if self._connection:
if lib.monetdbe_close(self._connection):
raise exceptions.OperationalError("Failed to close database")
Expand Down Expand Up @@ -258,10 +266,13 @@ def result_fetch_numpy(self, monetdbe_result: ffi.CData):
rcol = p_rcol[0]
name = make_string(rcol.name)
cast_string, cast_function, numpy_type, monetdbe_null = type_map[rcol.type]
# todo (gijs): typing
buffer_size = monetdbe_result.nrows * numpy_type.itemsize # type: ignore
c_buffer = ffi.buffer(rcol.data, buffer_size)
np_col = np.frombuffer(c_buffer, dtype=numpy_type)

if numpy_type.char == 'O':
np_col: np.ndarray = np.array([extract(rcol, r) for r in range(monetdbe_result.nrows)])
else:
buffer_size = monetdbe_result.nrows * numpy_type.itemsize
c_buffer = ffi.buffer(rcol.data, buffer_size)
np_col = np.frombuffer(c_buffer, dtype=numpy_type)

if monetdbe_null:
mask = np_col == monetdbe_null
Expand Down
9 changes: 8 additions & 1 deletion monetdbe/monetize.py
Expand Up @@ -50,13 +50,20 @@ def monet_memoryview(data: memoryview) -> str:
return "'%s'" % data.tobytes().hex()


def monet_float(data: float) -> str:
if data != data: # yes this is how you can check if a float is a NaN
return 'NULL'
else:
return str(data)


mapping: List[Tuple[Type, Callable]] = [
(str, monet_escape),
(bytes, monet_bytes),
(memoryview, monet_memoryview),
(int, str),
(complex, str),
(float, str),
(float, monet_float),
(decimal.Decimal, str),
(datetime.datetime, monet_escape),
(datetime.time, monet_escape),
Expand Down
141 changes: 141 additions & 0 deletions notebooks/basic_example.ipynb
@@ -0,0 +1,141 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from monetdbe import connect, Timestamp\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"con = connect(autocommit=True) # open an in-memory database"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"cur = con.execute(\"create table example(d timestamp, i int, f float)\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"_ = cur.executemany(\"insert into example(d, i, f) values (?, ?, ?)\", (\n",
" (datetime.now(), 10, 0.1),\n",
" (Timestamp(2004, 2, 14, 7, 15, 0, 510241), 20, 0.2),\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"_ = cur.execute(\"select * from example\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(datetime.datetime(2020, 6, 29, 15, 40, 35, 605000), 10, 0.1),\n",
" (datetime.datetime(2004, 2, 14, 7, 15, 0, 510000), 20, 0.2)]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cur.fetchall()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"_ = cur.execute(\"select * from example\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"array = cur.fetchdf()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Timestamp('1970-01-01 02:24:39.017219613'),\n",
" Timestamp('1817-02-23 09:10:54.898455133'))"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tuple(array['d'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
57 changes: 57 additions & 0 deletions tests/test_dataframe.py
@@ -0,0 +1,57 @@
from datetime import datetime
from typing import List, Any
from unittest import TestCase
from math import isnan
from pandas import DataFrame

from monetdbe import connect, Timestamp


def _connect(values: List[Any], type: str) -> DataFrame:
con = connect(autocommit=True)
cur = con.execute(f"create table example(d {type})")
cur.executemany("insert into example(d) values (?)", ((v,) for v in values))
cur.execute("select * from example")
return cur.fetchdf()


class TestDataFrame(TestCase):
def test_timestamp(self):
now = datetime.now().replace(microsecond=0) # monetdb doesn't support microseconds
values = [
now,
Timestamp(2004, 2, 14, 7, 15, 0, 510000),
]
df = _connect(values, 'timestamp')
self.assertEqual(values, list(df['d']))

def test_int(self):
values = [5, 10, -100]
df = _connect(values, 'int')
self.assertEqual(values, list(df['d']))

def test_float(self):
values = [5.0, 10.0, -100.0, float('nan')]
df = _connect(values, 'float')
self.assertEqual(values[:-1], list(df['d'])[:-1])
self.assertTrue(isnan(df['d'].iloc[-1]))

def test_char(self):
values = ['a', 'i', 'é']
df = _connect(values, 'char')
self.assertEqual(values, list(df['d']))

def test_string(self):
values = ['asssssssssssssssss', 'iwwwwwwwwwwwwwww', 'éooooooooooooooooooooo']
df = _connect(values, 'string')
self.assertEqual(values, list(df['d']))

def test_varchar(self):
values = ['a', 'aa', 'éooooooooooooooooooooo']
df = _connect(values, 'string')
self.assertEqual(values, list(df['d']))

def test_uuid(self):
values = ['6c49869d-45dc-4b00-ae55-5bd363c0c72c', '2ad49a96-ba10-11ea-b3de-0242ac130004']
df = _connect(values, 'uuid')
self.assertEqual(values, list(df['d']))
2 changes: 1 addition & 1 deletion tests/test_lite/test_dbapi05.py
Expand Up @@ -9,4 +9,4 @@ def test_description(self, monetdbe_cursor):
def test_description_fields(self, monetdbe_cursor):
monetdbe_cursor.execute('select name from sys.tables')
assert monetdbe_cursor.description[0][0] == "name"
assert monetdbe_cursor.description[0][1] == numpy.dtype('<U')
assert monetdbe_cursor.description[0][1] == numpy.dtype('O')

0 comments on commit d5fbf98

Please sign in to comment.