ValueError: Trying to coerce float values to integers

ValueError                                Traceback (most recent call last)
Cell In[12], [line 1](vscode-notebook-cell:?execution_count=12&line=1)
----> [1](vscode-notebook-cell:?execution_count=12&line=1) X_train_selected, y_train = fwiz.fit_transform(X_train, y_train)
      2 X_test_selected = fwiz.transform(X_test)

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils\_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> [316](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/utils/_set_output.py:316)     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File e:\MiniConda\envs\lol\Lib\site-packages\featurewiz\featurewiz.py:3247, in FeatureWiz.fit_transform(self, X, y)
   3245 def fit_transform(self, X, y):
   3246     self.fit(X, y)
-> [3247](file:///E:/MiniConda/envs/lol/Lib/site-packages/featurewiz/featurewiz.py:3247)     X_sel, y_sel = self.transform(X, y)
   3248     return X_sel, y_sel

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils\_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> [316](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/utils/_set_output.py:316)     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File e:\MiniConda\envs\lol\Lib\site-packages\featurewiz\featurewiz.py:3410, in FeatureWiz.transform(self, X, y)
   3407 X_sel.index = X_index
   3409 #### Use lazytransform to transform all variables to numeric ###
-> [3410](file:///E:/MiniConda/envs/lol/Lib/site-packages/featurewiz/featurewiz.py:3410) X_sel, y_sel = self.lazy.fit_transform(X_sel, y)
   3412 ### Sometimes after imbalanced flag, this index becomes different!
   3413 X_index = X_sel.index

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils\_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> [316](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/utils/_set_output.py:316)     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File e:\MiniConda\envs\lol\Lib\site-packages\lazytransform.py:1756, in LazyTransformer.fit_transform(self, X, y)
   1754 start_time = time.time()
   1755 self.fit(X,y)
-> [1756](file:///E:/MiniConda/envs/lol/Lib/site-packages/lazytransform.py:1756) X_trans =  self.xformer.transform(X)
   1757 X_trans.index = self.X_index
   1759 ### Here you can straight away fit and transform y ###

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\pipeline.py:903, in Pipeline.transform(self, X, **params)
    901 Xt = X
    902 for _, name, transform in self._iter():
--> [903](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/pipeline.py:903)     Xt = transform.transform(Xt, **routed_params[name].transform)
    904 return Xt

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\pipeline.py:903, in Pipeline.transform(self, X, **params)
    901 Xt = X
    902 for _, name, transform in self._iter():
--> [903](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/pipeline.py:903)     Xt = transform.transform(Xt, **routed_params[name].transform)
    904 return Xt

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils\_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> [316](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/utils/_set_output.py:316)     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\preprocessing\_function_transformer.py:252, in FunctionTransformer.transform(self, X)
    238 """Transform X using the forward function.
    239 
    240 Parameters
   (...)    249     Transformed input.
    250 """
    251 X = self._check_input(X, reset=False)
--> [252](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/preprocessing/_function_transformer.py:252) out = self._transform(X, func=self.func, kw_args=self.kw_args)
    253 output_config = _get_output_config("transform", self)["dense"]
    255 if hasattr(out, "columns") and self.feature_names_out is not None:
    256     # check the consistency between the column provided by `transform` and
    257     # the the column names provided by `get_feature_names_out`.

File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\preprocessing\_function_transformer.py:379, in FunctionTransformer._transform(self, X, func, kw_args)
    376 if func is None:
    377     func = _identity
--> [379](file:///E:/MiniConda/envs/lol/Lib/site-packages/sklearn/preprocessing/_function_transformer.py:379) return func(X, **(kw_args if kw_args else {}))

File e:\MiniConda\envs\lol\Lib\site-packages\lazytransform.py:713, in create_column_names_onehot(Xt, nlpvars, catvars, discretevars, floatvars, intvars, datevars, onehot_dict, colsize_dict, datesize_dict)
    708 ### Xt is already a dense array, no need to convert it ##
    709 ### Remember don't combine the next 2 lines into one. That will be a disaster.
    710 ### Pandas infers data types autmatically and they always are float64. So
    711 ###  to avoid that I have split the data into two or three types 
    712 if num_len == 0:
--> [713](file:///E:/MiniConda/envs/lol/Lib/site-packages/lazytransform.py:713)     Xint = pd.DataFrame(Xt[:,:], columns = cols_names, dtype=np.int16)
    714     return Xint
    715 else:

File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\frame.py:827, in DataFrame.__init__(self, data, index, columns, dtype, copy)
    816         mgr = dict_to_mgr(
    817             # error: Item "ndarray" of "Union[ndarray, Series, Index]" has no
    818             # attribute "name"
   (...)    824             copy=_copy,
    825         )
    826     else:
--> [827](file:///E:/MiniConda/envs/lol/Lib/site-packages/pandas/core/frame.py:827)         mgr = ndarray_to_mgr(
    828             data,
    829             index,
    830             columns,
    831             dtype=dtype,
    832             copy=copy,
    833             typ=manager,
    834         )
    836 # For data is list-like, or Iterable (will consume into list)
    837 elif is_list_like(data):

File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\internals\construction.py:323, in ndarray_to_mgr(values, index, columns, dtype, copy, typ)
    319     values = _prep_ndarraylike(values, copy=copy_on_sanitize)
    321 if dtype is not None and values.dtype != dtype:
    322     # GH#40110 see similar check inside sanitize_array
--> [323](file:///E:/MiniConda/envs/lol/Lib/site-packages/pandas/core/internals/construction.py:323)     values = sanitize_array(
    324         values,
    325         None,
    326         dtype=dtype,
    327         copy=copy_on_sanitize,
    328         allow_2d=True,
    329     )
    331 # _prep_ndarraylike ensures that values.ndim == 2 at this point
    332 index, columns = _get_axes(
    333     values.shape[0], values.shape[1], index=index, columns=columns
    334 )

File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\construction.py:622, in sanitize_array(data, index, dtype, copy, allow_2d)
    618             subarr = subarr.copy()
    620     else:
    621         # we will try to copy by-definition here
--> [622](file:///E:/MiniConda/envs/lol/Lib/site-packages/pandas/core/construction.py:622)         subarr = _try_cast(data, dtype, copy)
    624 elif hasattr(data, "__array__"):
    625     # e.g. dask array GH#38645
    626     if not copy:

File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\construction.py:815, in _try_cast(arr, dtype, copy)
    810 # GH#15832: Check if we are requesting a numeric dtype and
    811 # that we can convert the data to the requested dtype.
    812 elif dtype.kind in "iu":
    813     # this will raise if we have e.g. floats
--> [815](file:///E:/MiniConda/envs/lol/Lib/site-packages/pandas/core/construction.py:815)     subarr = maybe_cast_to_integer_array(arr, dtype)
    816 elif not copy:
    817     subarr = np.asarray(arr, dtype=dtype)

File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\dtypes\cast.py:1710, in maybe_cast_to_integer_array(arr, dtype)
   1706     if not np.isfinite(arr).all():
   1707         raise IntCastingNaNError(
   1708             "Cannot convert non-finite values (NA or inf) to integer"
   1709         )
-> [1710](file:///E:/MiniConda/envs/lol/Lib/site-packages/pandas/core/dtypes/cast.py:1710)     raise ValueError("Trying to coerce float values to integers")
   1711 if arr.dtype == object:
   1712     raise ValueError("Trying to coerce float values to integers")

ValueError: Trying to coerce float values to integers

--------------------------------------------------------------------------------------------------
I am getting this error with my dataset when calling featurewiz as 
```
fwiz = FeatureWiz(feature_engg = '', nrows=None, transform_target=True,
        		category_encoders="auto", auto_encoders='CNN_ADD', ae_options={},
        		add_missing=False, imbalanced=False, verbose=0)
X_train_selected, y_train = fwiz.fit_transform(X_train, y_train)
X_test_selected = fwiz.transform(X_test)
```
My dataset looks like this (I stripped out the float values because at the beginning I thought it was due to existing floating value columns, but it seems not)
year | playoffs | participantid | gamelength | kills | deaths | assists | teamkills | teamdeaths | totalgold
-- | -- | -- | -- | -- | -- | -- | -- | -- | --
2022 | 0 | 100 | 2539 | 16 | 10 | 29 | 16 | 10 | 76746
2022 | 0 | 100 | 1852 | 4 | 17 | 9 | 4 | 17 | 53592
2022 | 0 | 200 | 2241 | 13 | 4 | 35 | 13 | 4 | 67474
2022 | 0 | 100 | 1983 | 9 | 6 | 9 | 9 | 6 | 62614
2022 | 0 | 100 | 2403 | 11 | 10 | 32 | 11 | 10 | 70460

I also tried it out on some example datasets in the examples folder. It works fine with `winequality.csv` but gives out same error with `boston.csv`. Can you help me solve the issue please.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

ValueError: Trying to coerce float values to integers #132

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

year	participantid	gamelength	kills	deaths	assists	teamkills	teamdeaths	totalgold
2022	100	2539	16	10	29	16	10	76746
2022	100	1852	4	17	9	4	17	53592
2022	200	2241	13	4	35	13	4	67474
2022	100	1983	9	6	9	9	6	62614
2022	100	2403	11	10	32	11	10	70460

ValueError: Trying to coerce float values to integers #132

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions