Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor to allow subclasses to set their own time format #177

Merged
merged 12 commits into from
Jan 25, 2019
3 changes: 2 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@

# Next Release

- [#177](https://github.com/IAMconsortium/pyam/pull/177) Modified formatting of time column on init to allow subclasses to avoid pandas limitation (https://stackoverflow.com/a/37226672)
- [#176](https://github.com/IAMconsortium/pyam/pull/176) Corrected title setting operation in line_plot function
- [#175](https://github.com/IAMconsortium/pyam/pull/175) Update link to tutorial in readme.md
- [#175](https://github.com/IAMconsortium/pyam/pull/175) Update link to tutorial in readme.md
- [#174](https://github.com/IAMconsortium/pyam/pull/174) Add a function `difference()` to compare two IamDataFrames
- [#171](https://github.com/IAMconsortium/pyam/pull/171) Fix a bug when reading from an `ixmp.TimeSeries` object, refactor to mitigate circular dependency
- [#162](https://github.com/IAMconsortium/pyam/pull/162) Add a function to sum and append timeseries components to an aggregate variable
Expand Down
16 changes: 16 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
read_files,
read_pandas,
format_data,
cast_years_to_int,
pattern_match,
years_match,
month_match,
Expand Down Expand Up @@ -72,7 +73,9 @@ def __init__(self, data, **kwargs):
_data = read_ix(data, **kwargs)
else:
_data = read_files(data, **kwargs)

self.data, self.time_col, self.extra_cols = _data
self._format_data_time_col()
self._LONG_IDX = IAMC_IDX + [self.time_col] + self.extra_cols

# define a dataframe for categorization and other metadata indicators
Expand All @@ -83,6 +86,19 @@ def __init__(self, data, **kwargs):
if 'exec' in run_control():
self._execute_run_control()

def _format_data_time_col(self):
znicholls marked this conversation as resolved.
Show resolved Hide resolved
# cast time_col to desired format
if self.time_col == 'year':
if not self.data['year'].dtype == 'int64':
self.data['year'] = cast_years_to_int(
pd.to_numeric(self.data['year'])
)
elif self.time_col == 'time':
self._format_datetime_col()

def _format_datetime_col(self):
self.data['time'] = pd.to_datetime(self.data['time'])

def __getitem__(self, key):
_key_check = [key] if isstr(key) else key
if set(_key_check).issubset(self.meta.columns):
Expand Down
13 changes: 5 additions & 8 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import glob
import collections
import datetime
from dateutil import parser
import time

import numpy as np
Expand Down Expand Up @@ -172,8 +173,11 @@ def format_data(df):
try:
year_cols.append(i) if int(i) else None
except (ValueError, TypeError):
if isinstance(i, datetime.datetime):
time_cols.append(i)
continue
try:
pd.to_datetime([i])
parser.parse(i)
time_cols.append(i)
except ValueError:
extra_cols.append(i)
Expand All @@ -189,13 +193,6 @@ def format_data(df):
df = pd.melt(df, id_vars=IAMC_IDX + extra_cols, var_name=time_col,
value_vars=sorted(melt_cols), value_name='value')

# cast time_col to correct format
if time_col == 'year':
if not df.year.dtype == 'int64':
df['year'] = cast_years_to_int(pd.to_numeric(df['year']))
if time_col == 'time':
df['time'] = pd.to_datetime(df['time'])

# cast value columns to numeric, drop NaN's, sort data
df['value'] = df['value'].astype('float64')
df.dropna(inplace=True)
Expand Down
43 changes: 43 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,49 @@ def test_init_df_with_extra_col(test_pd_df):
tdf, check_like=True)


@pytest.mark.xfail(reason=(
"pandas datetime is limited to the time period of ~1677-2262, see "
"https://stackoverflow.com/a/37226672"
))
def test_init_df_long_timespan(test_pd_df):
znicholls marked this conversation as resolved.
Show resolved Hide resolved
tdf = test_pd_df.copy()
tmin = datetime.datetime(2005, 6, 17)
tmax = datetime.datetime(3005, 6, 17)
tdf = tdf.rename(
{
2005: tmin,
2010: tmax,
},
axis="columns"
)

df = IamDataFrame(tdf)

assert df["time"].max() == tmax
assert df["time"].min() == tmin


def test_subclass_passesinit_df_long_timespan(test_pd_df):
znicholls marked this conversation as resolved.
Show resolved Hide resolved
class TempSubClass(IamDataFrame):
def _format_datetime_col(self):
pass

tdf = test_pd_df.copy()
tmin = datetime.datetime(2005, 6, 17)
tmax = datetime.datetime(3005, 6, 17)
tdf = tdf.rename(
{
2005: tmin,
2010: tmax,
},
axis="columns"
)

df = TempSubClass(tdf)

assert df["time"].max() == tmax
assert df["time"].min() == tmin


def test_to_excel(test_df):
fname = 'foo_testing.xlsx'
Expand Down