Skip to content

Commit

Permalink
Merge pull request #48 from martinRenou/remove_data
Browse files Browse the repository at this point in the history
Remove data
  • Loading branch information
martinRenou committed Feb 28, 2019
2 parents 28673e8 + e1a3740 commit 89da796
Show file tree
Hide file tree
Showing 9 changed files with 2,198 additions and 2,408 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ before_install:
- conda info -a
- conda create -q -n test-environment python=$PYTHON_VERSION ipywidgets runipy
- source activate test-environment
- conda install -c conda-forge pytest pytest-cov nodejs pscript flake8
- conda install -c conda-forge pytest pytest-cov nodejs pscript flake8 pandas numpy
- if [[ $TRAVIS_OS_NAME == linux ]]; then conda install -c conda-forge jupyterlab; fi
- pip install coveralls
install:
Expand Down
11 changes: 0 additions & 11 deletions examples/pandas.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,6 @@
"sheet2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"sheet2.data"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
8 changes: 4 additions & 4 deletions ipysheet/easy.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def cell(row, column, value=0., type=None, color=None, background_color=None,

def row(row, value, column_start=0, column_end=None, choice=None,
read_only=False, format='0.[000]', renderer=None,
color=None, background_color=None, font_style=None, font_weight=None):
color=None, background_color=None, font_style=None, font_weight=None, type=None):
"""Create a CellRange widget, representing multiple cells in a sheet, in a horizontal column
Parameters
Expand All @@ -142,12 +142,12 @@ def row(row, value, column_start=0, column_end=None, choice=None,
return cell_range(value, column_start=column_start, column_end=column_end, row_start=row, row_end=row,
squeeze_row=True, squeeze_column=False,
color=color, background_color=background_color,
font_style=font_style, font_weight=font_weight)
font_style=font_style, font_weight=font_weight, type=type)


def column(column, value, row_start=0, row_end=None, choice=None,
read_only=False, format='0.[000]', renderer=None,
color=None, background_color=None, font_style=None, font_weight=None):
color=None, background_color=None, font_style=None, font_weight=None, type=None):
"""Create a CellRange widget, representing multiple cells in a sheet, in a vertical column
Parameters
Expand All @@ -169,7 +169,7 @@ def column(column, value, row_start=0, row_end=None, choice=None,
squeeze_row=False, squeeze_column=True,
read_only=read_only, format=format, renderer=renderer,
color=color, background_color=background_color,
font_style=font_style, font_weight=font_weight)
font_style=font_style, font_weight=font_weight, type=type)


def cell_range(value, row_start=0, column_start=0, row_end=None, column_end=None, transpose=False,
Expand Down
65 changes: 49 additions & 16 deletions ipysheet/pandas_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def _get_cell_type(dt):
'M': 'date',
'S': 'text',
'U': 'text'
}.get(dt.kind, None)
}.get(dt.kind, 'text')


def _format_date(date):
Expand Down Expand Up @@ -55,16 +55,18 @@ def from_dataframe(dataframe):
>>> sheet = from_dataframe(df)
>>> display(sheet)
"""
import numpy as np

# According to pandas documentation: "NumPy arrays have one dtype for the
# entire array, while pandas DataFrames have one dtype per column", so it
# makes more sense to create the sheet and fill it column-wise
columns = dataframe.columns.to_numpy()
rows = dataframe.index.to_numpy()
columns = dataframe.columns.tolist()
rows = dataframe.index.tolist()
cells = []

idx = 0
for c in columns:
arr = dataframe[c].to_numpy()
arr = np.array(dataframe[c].values)
cells.append(Cell(
value=_get_cell_value(arr),
row_start=0,
Expand All @@ -86,26 +88,52 @@ def from_dataframe(dataframe):
)


def _extract_cell_data(cell, data):
for row in range(cell.row_start, cell.row_end + 1):
for col in range(cell.column_start, cell.column_end + 1):
value = cell.value
if cell.transpose:
if not cell.squeeze_column:
value = value[col]
if not cell.squeeze_row:
value = value[row]
else:
if not cell.squeeze_row:
value = value[row]
if not cell.squeeze_column:
value = value[col]

data[row][col]['value'] = value
data[row][col]['options']['type'] = cell.type


def _extract_data(sheet):
data = []
for _ in range(sheet.rows):
data.append([
{'value': None, 'options': {'type': type(None)}}
for _ in range(sheet.columns)
])

for cell in sheet.cells:
_extract_cell_data(cell, data)

return data


def _extract_column(data, idx):
import numpy as np
import pandas as pd

if len(data) == 0:
return np.array()

type = data[0][idx]['options'].get('type', 'text')
arr = [row[idx]['value'] for row in data]
print(type)

if type == 'date':
d = pd.to_datetime(arr)

return np.array(d, dtype='M')
elif type == 'checkbox':
return np.array(arr, dtype=np.bool)
elif type == 'numeric':
return np.array(arr, dtype='f')
else:
return np.array(arr, dtype='U')
return np.array(arr)


def to_dataframe(sheet):
Expand Down Expand Up @@ -133,19 +161,24 @@ def to_dataframe(sheet):
"""
import pandas as pd

data = _extract_data(sheet)

if len(data) == 0:
return pd.DataFrame()

if (type(sheet.column_headers) == bool):
column_headers = [chr(ord('A') + i) for i in range(len(sheet.data[0]))]
column_headers = [chr(ord('A') + i) for i in range(len(data[0]))]
else:
column_headers = list(sheet.column_headers)

if (type(sheet.row_headers) == bool):
row_headers = [i for i in range(len(sheet.data))]
row_headers = [i for i in range(len(data))]
else:
row_headers = list(sheet.row_headers)

return pd.DataFrame(
{
header: _extract_column(sheet.data, idx)
header: _extract_column(data, idx)
for idx, header in enumerate(column_headers)
},
index=row_headers,
Expand Down
1 change: 0 additions & 1 deletion ipysheet/sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ class Sheet(widgets.DOMWidget):
_model_module_version = Unicode(semver_range_frontend).tag(sync=True)
rows = CInt(3).tag(sync=True)
columns = CInt(4).tag(sync=True)
data = List(Instance(list), [[]]).tag(sync=True)
cells = Tuple().tag(sync=True, **widgets.widget_serialization)
named_cells = Dict(value={}, allow_none=False).tag(sync=True, **widgets.widget_serialization)
row_headers = Union([Bool(), List(Unicode())], default_value=True).tag(sync=True)
Expand Down
94 changes: 94 additions & 0 deletions ipysheet/test_all.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import numpy as np
import pandas as pd
import ipysheet
import pytest
from ipysheet.utils import transpose
Expand Down Expand Up @@ -245,3 +247,95 @@ def f(x):
renderer = ipysheet.renderer(f, 'name2')
assert "somefunction" in renderer.code
assert renderer.name == 'name2'


def _format_date(date):
import pandas as pd

return pd.to_datetime(str(date)).strftime('%Y/%m/%d')


def test_to_dataframe():
sheet = ipysheet.sheet(rows=5, columns=4)
ipysheet.cell(0, 0, value=True)
ipysheet.row(1, value=[2, 34, 543, 23])
ipysheet.column(3, value=[1.2, 1.3, 1.4, 1.5, 1.6])

df = ipysheet.to_dataframe(sheet)
assert np.all(df['A'].tolist() == [True, 2, None, None, None])
assert np.all(df['B'].tolist() == [None, 34, None, None, None])
assert np.all(df['C'].tolist() == [None, 543, None, None, None])
assert np.all(df['D'].tolist() == [1.2, 1.3, 1.4, 1.5, 1.6])

sheet = ipysheet.sheet(rows=4, columns=4, column_headers=['c0', 'c1', 'c2', 'c3'], row_headers=['r0', 'r1', 'r2', 'r3'])
ipysheet.cell_range(
[
[2, 34, 543, 23],
[1, 1, 1, 1],
[2, 2, 222, 22],
[2, 0, 111, 11],
],
row_start=0, column_start=0,
transpose=True
)

df = ipysheet.to_dataframe(sheet)
assert np.all(df['c0'].tolist() == [2, 34, 543, 23])
assert np.all(df['c1'].tolist() == [1, 1, 1, 1])
assert np.all(df['c2'].tolist() == [2, 2, 222, 22])
assert np.all(df['c3'].tolist() == [2, 0, 111, 11])

sheet = ipysheet.sheet(rows=4, columns=4, column_headers=['t0', 't1', 't2', 't3'])
ipysheet.cell_range(
[
[2, 34, 543, 23],
[1, 1, 1, 1],
[2, 2, 222, 22],
[2, 0, 111, 11],
],
row_start=0, column_start=0,
transpose=False
)

df = ipysheet.to_dataframe(sheet)
assert np.all(df['t0'].tolist() == [2, 1, 2, 2])
assert np.all(df['t1'].tolist() == [34, 1, 2, 0])
assert np.all(df['t2'].tolist() == [543, 1, 222, 111])
assert np.all(df['t3'].tolist() == [23, 1, 22, 11])

sheet = ipysheet.sheet(rows=0, columns=0)

df = ipysheet.to_dataframe(sheet)
assert np.all(df == pd.DataFrame())

sheet = ipysheet.sheet(rows=4, columns=1)
ipysheet.column(0, ['2019/02/28', '2019/02/27', '2019/02/26', '2019/02/25'], type='date')

df = ipysheet.to_dataframe(sheet)
assert [_format_date(x) for x in df['A'].tolist()] == ['2019/02/28', '2019/02/27', '2019/02/26', '2019/02/25']


def test_from_dataframe():
df = pd.DataFrame({
'A': 1.,
'B': pd.Timestamp('20130102'),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
'D': np.array([False, True, False, False], dtype='bool'),
'S': pd.Categorical(["test", "train", "test", "train"]),
'T': 'foo'})

sheet = ipysheet.from_dataframe(df)
assert len(sheet.cells) == 6
assert sheet.column_headers == ['A', 'B', 'C', 'D', 'S', 'T']
assert sheet.cells[0].value == [1., 1., 1., 1.]
assert sheet.cells[0].type == 'numeric'
assert sheet.cells[1].value == ['2013/01/02', '2013/01/02', '2013/01/02', '2013/01/02']
assert sheet.cells[1].type == 'date'
assert sheet.cells[2].value == [1., 1., 1., 1.]
assert sheet.cells[2].type == 'numeric'
assert sheet.cells[3].value == [False, True, False, False]
assert sheet.cells[3].type == 'checkbox'
assert sheet.cells[4].value == ['test', 'train', 'test', 'train']
assert sheet.cells[4].type == 'text'
assert sheet.cells[5].value == ['foo', 'foo', 'foo', 'foo']
assert sheet.cells[5].type == 'text'

0 comments on commit 89da796

Please sign in to comment.