Merge pull request #48 from martinRenou/remove_data

Remove data
jupyter-widgets-contrib · Feb 28, 2019 · 89da796 · 89da796
2 parents 28673e8 + e1a3740
commit 89da796
Show file tree

Hide file tree

Showing 9 changed files with 2,198 additions and 2,408 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -19,7 +19,7 @@ before_install:
   - conda info -a
   - conda create -q -n test-environment python=$PYTHON_VERSION ipywidgets runipy
   - source activate test-environment
-  - conda install -c conda-forge pytest pytest-cov nodejs pscript flake8
+  - conda install -c conda-forge pytest pytest-cov nodejs pscript flake8 pandas numpy
   - if [[ $TRAVIS_OS_NAME == linux ]]; then conda install -c conda-forge jupyterlab; fi
   - pip install coveralls
 install:

diff --git a/examples/pandas.ipynb b/examples/pandas.ipynb
@@ -34,17 +34,6 @@
     "sheet2"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "sheet2.data"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/ipysheet/easy.py b/ipysheet/easy.py
@@ -121,7 +121,7 @@ def cell(row, column, value=0., type=None, color=None, background_color=None,
 
 def row(row, value, column_start=0, column_end=None, choice=None,
         read_only=False, format='0.[000]', renderer=None,
-        color=None, background_color=None, font_style=None, font_weight=None):
+        color=None, background_color=None, font_style=None, font_weight=None, type=None):
     """Create a CellRange widget, representing multiple cells in a sheet, in a horizontal column
 
     Parameters
@@ -142,12 +142,12 @@ def row(row, value, column_start=0, column_end=None, choice=None,
     return cell_range(value, column_start=column_start, column_end=column_end, row_start=row, row_end=row,
                       squeeze_row=True, squeeze_column=False,
                       color=color, background_color=background_color,
-                      font_style=font_style, font_weight=font_weight)
+                      font_style=font_style, font_weight=font_weight, type=type)
 
 
 def column(column, value, row_start=0, row_end=None,  choice=None,
            read_only=False, format='0.[000]', renderer=None,
-           color=None, background_color=None, font_style=None, font_weight=None):
+           color=None, background_color=None, font_style=None, font_weight=None, type=None):
     """Create a CellRange widget, representing multiple cells in a sheet, in a vertical column
 
     Parameters
@@ -169,7 +169,7 @@ def column(column, value, row_start=0, row_end=None,  choice=None,
                       squeeze_row=False, squeeze_column=True,
                       read_only=read_only, format=format, renderer=renderer,
                       color=color, background_color=background_color,
-                      font_style=font_style, font_weight=font_weight)
+                      font_style=font_style, font_weight=font_weight, type=type)
 
 
 def cell_range(value, row_start=0, column_start=0, row_end=None, column_end=None, transpose=False,

diff --git a/ipysheet/pandas_loader.py b/ipysheet/pandas_loader.py
@@ -15,7 +15,7 @@ def _get_cell_type(dt):
         'M': 'date',
         'S': 'text',
         'U': 'text'
-    }.get(dt.kind, None)
+    }.get(dt.kind, 'text')
 
 
 def _format_date(date):
@@ -55,16 +55,18 @@ def from_dataframe(dataframe):
     >>> sheet = from_dataframe(df)
     >>> display(sheet)
     """
+    import numpy as np
+
     # According to pandas documentation: "NumPy arrays have one dtype for the
     # entire array, while pandas DataFrames have one dtype per column", so it
     # makes more sense to create the sheet and fill it column-wise
-    columns = dataframe.columns.to_numpy()
-    rows = dataframe.index.to_numpy()
+    columns = dataframe.columns.tolist()
+    rows = dataframe.index.tolist()
     cells = []
 
     idx = 0
     for c in columns:
-        arr = dataframe[c].to_numpy()
+        arr = np.array(dataframe[c].values)
         cells.append(Cell(
             value=_get_cell_value(arr),
             row_start=0,
@@ -86,26 +88,52 @@ def from_dataframe(dataframe):
     )
 
 
+def _extract_cell_data(cell, data):
+    for row in range(cell.row_start, cell.row_end + 1):
+        for col in range(cell.column_start, cell.column_end + 1):
+            value = cell.value
+            if cell.transpose:
+                if not cell.squeeze_column:
+                    value = value[col]
+                if not cell.squeeze_row:
+                    value = value[row]
+            else:
+                if not cell.squeeze_row:
+                    value = value[row]
+                if not cell.squeeze_column:
+                    value = value[col]
+
+            data[row][col]['value'] = value
+            data[row][col]['options']['type'] = cell.type
+
+
+def _extract_data(sheet):
+    data = []
+    for _ in range(sheet.rows):
+        data.append([
+            {'value': None, 'options': {'type': type(None)}}
+            for _ in range(sheet.columns)
+        ])
+
+    for cell in sheet.cells:
+        _extract_cell_data(cell, data)
+
+    return data
+
+
 def _extract_column(data, idx):
     import numpy as np
     import pandas as pd
 
-    if len(data) == 0:
-        return np.array()
-
     type = data[0][idx]['options'].get('type', 'text')
     arr = [row[idx]['value'] for row in data]
-    print(type)
+
     if type == 'date':
         d = pd.to_datetime(arr)
 
         return np.array(d, dtype='M')
-    elif type == 'checkbox':
-        return np.array(arr, dtype=np.bool)
-    elif type == 'numeric':
-        return np.array(arr, dtype='f')
     else:
-        return np.array(arr, dtype='U')
+        return np.array(arr)
 
 
 def to_dataframe(sheet):
@@ -133,19 +161,24 @@ def to_dataframe(sheet):
     """
     import pandas as pd
 
+    data = _extract_data(sheet)
+
+    if len(data) == 0:
+        return pd.DataFrame()
+
     if (type(sheet.column_headers) == bool):
-        column_headers = [chr(ord('A') + i) for i in range(len(sheet.data[0]))]
+        column_headers = [chr(ord('A') + i) for i in range(len(data[0]))]
     else:
         column_headers = list(sheet.column_headers)
 
     if (type(sheet.row_headers) == bool):
-        row_headers = [i for i in range(len(sheet.data))]
+        row_headers = [i for i in range(len(data))]
     else:
         row_headers = list(sheet.row_headers)
 
     return pd.DataFrame(
         {
-            header: _extract_column(sheet.data, idx)
+            header: _extract_column(data, idx)
             for idx, header in enumerate(column_headers)
         },
         index=row_headers,

diff --git a/ipysheet/sheet.py b/ipysheet/sheet.py
@@ -82,7 +82,6 @@ class Sheet(widgets.DOMWidget):
     _model_module_version = Unicode(semver_range_frontend).tag(sync=True)
     rows = CInt(3).tag(sync=True)
     columns = CInt(4).tag(sync=True)
-    data = List(Instance(list), [[]]).tag(sync=True)
     cells = Tuple().tag(sync=True, **widgets.widget_serialization)
     named_cells = Dict(value={}, allow_none=False).tag(sync=True, **widgets.widget_serialization)
     row_headers = Union([Bool(), List(Unicode())], default_value=True).tag(sync=True)

diff --git a/ipysheet/test_all.py b/ipysheet/test_all.py
@@ -1,3 +1,5 @@
+import numpy as np
+import pandas as pd
 import ipysheet
 import pytest
 from ipysheet.utils import transpose
@@ -245,3 +247,95 @@ def f(x):
     renderer = ipysheet.renderer(f, 'name2')
     assert "somefunction" in renderer.code
     assert renderer.name == 'name2'
+
+
+def _format_date(date):
+    import pandas as pd
+
+    return pd.to_datetime(str(date)).strftime('%Y/%m/%d')
+
+
+def test_to_dataframe():
+    sheet = ipysheet.sheet(rows=5, columns=4)
+    ipysheet.cell(0, 0, value=True)
+    ipysheet.row(1, value=[2, 34, 543, 23])
+    ipysheet.column(3, value=[1.2, 1.3, 1.4, 1.5, 1.6])
+
+    df = ipysheet.to_dataframe(sheet)
+    assert np.all(df['A'].tolist() == [True,   2, None, None, None])
+    assert np.all(df['B'].tolist() == [None,  34, None, None, None])
+    assert np.all(df['C'].tolist() == [None, 543, None, None, None])
+    assert np.all(df['D'].tolist() == [1.2,  1.3,  1.4,  1.5,  1.6])
+
+    sheet = ipysheet.sheet(rows=4, columns=4, column_headers=['c0', 'c1', 'c2', 'c3'], row_headers=['r0', 'r1', 'r2', 'r3'])
+    ipysheet.cell_range(
+        [
+            [2, 34, 543, 23],
+            [1,  1,   1,  1],
+            [2,  2, 222, 22],
+            [2,  0, 111, 11],
+        ],
+        row_start=0, column_start=0,
+        transpose=True
+    )
+
+    df = ipysheet.to_dataframe(sheet)
+    assert np.all(df['c0'].tolist() == [2, 34, 543, 23])
+    assert np.all(df['c1'].tolist() == [1,  1,   1,  1])
+    assert np.all(df['c2'].tolist() == [2,  2, 222, 22])
+    assert np.all(df['c3'].tolist() == [2,  0, 111, 11])
+
+    sheet = ipysheet.sheet(rows=4, columns=4, column_headers=['t0', 't1', 't2', 't3'])
+    ipysheet.cell_range(
+        [
+            [2, 34, 543, 23],
+            [1,  1,   1,  1],
+            [2,  2, 222, 22],
+            [2,  0, 111, 11],
+        ],
+        row_start=0, column_start=0,
+        transpose=False
+    )
+
+    df = ipysheet.to_dataframe(sheet)
+    assert np.all(df['t0'].tolist() == [2,   1,   2,   2])
+    assert np.all(df['t1'].tolist() == [34,  1,   2,   0])
+    assert np.all(df['t2'].tolist() == [543, 1, 222, 111])
+    assert np.all(df['t3'].tolist() == [23,  1,  22,  11])
+
+    sheet = ipysheet.sheet(rows=0, columns=0)
+
+    df = ipysheet.to_dataframe(sheet)
+    assert np.all(df == pd.DataFrame())
+
+    sheet = ipysheet.sheet(rows=4, columns=1)
+    ipysheet.column(0, ['2019/02/28', '2019/02/27', '2019/02/26', '2019/02/25'], type='date')
+
+    df = ipysheet.to_dataframe(sheet)
+    assert [_format_date(x) for x in df['A'].tolist()] == ['2019/02/28', '2019/02/27', '2019/02/26', '2019/02/25']
+
+
+def test_from_dataframe():
+    df = pd.DataFrame({
+        'A': 1.,
+        'B': pd.Timestamp('20130102'),
+        'C': pd.Series(1, index=list(range(4)), dtype='float32'),
+        'D': np.array([False, True, False, False], dtype='bool'),
+        'S': pd.Categorical(["test", "train", "test", "train"]),
+        'T': 'foo'})
+
+    sheet = ipysheet.from_dataframe(df)
+    assert len(sheet.cells) == 6
+    assert sheet.column_headers == ['A', 'B', 'C', 'D', 'S', 'T']
+    assert sheet.cells[0].value == [1., 1., 1., 1.]
+    assert sheet.cells[0].type == 'numeric'
+    assert sheet.cells[1].value == ['2013/01/02', '2013/01/02', '2013/01/02', '2013/01/02']
+    assert sheet.cells[1].type == 'date'
+    assert sheet.cells[2].value == [1., 1., 1., 1.]
+    assert sheet.cells[2].type == 'numeric'
+    assert sheet.cells[3].value == [False, True, False, False]
+    assert sheet.cells[3].type == 'checkbox'
+    assert sheet.cells[4].value == ['test', 'train', 'test', 'train']
+    assert sheet.cells[4].type == 'text'
+    assert sheet.cells[5].value == ['foo', 'foo', 'foo', 'foo']
+    assert sheet.cells[5].type == 'text'