-
Notifications
You must be signed in to change notification settings - Fork 98
Description
ValueError Traceback (most recent call last)
Cell In[12], line 1
----> 1 X_train_selected, y_train = fwiz.fit_transform(X_train, y_train)
2 X_test_selected = fwiz.transform(X_test)
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils_set_output.py:316, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
314 @wraps(f)
315 def wrapped(self, X, *args, **kwargs):
--> 316 data_to_wrap = f(self, X, *args, **kwargs)
317 if isinstance(data_to_wrap, tuple):
318 # only wrap the first output for cross decomposition
319 return_tuple = (
320 _wrap_data_with_container(method, data_to_wrap[0], X, self),
321 *data_to_wrap[1:],
322 )
File e:\MiniConda\envs\lol\Lib\site-packages\featurewiz\featurewiz.py:3247, in FeatureWiz.fit_transform(self, X, y)
3245 def fit_transform(self, X, y):
3246 self.fit(X, y)
-> 3247 X_sel, y_sel = self.transform(X, y)
3248 return X_sel, y_sel
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils_set_output.py:316, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
314 @wraps(f)
315 def wrapped(self, X, *args, **kwargs):
--> 316 data_to_wrap = f(self, X, *args, **kwargs)
317 if isinstance(data_to_wrap, tuple):
318 # only wrap the first output for cross decomposition
319 return_tuple = (
320 _wrap_data_with_container(method, data_to_wrap[0], X, self),
321 *data_to_wrap[1:],
322 )
File e:\MiniConda\envs\lol\Lib\site-packages\featurewiz\featurewiz.py:3410, in FeatureWiz.transform(self, X, y)
3407 X_sel.index = X_index
3409 #### Use lazytransform to transform all variables to numeric ###
-> 3410 X_sel, y_sel = self.lazy.fit_transform(X_sel, y)
3412 ### Sometimes after imbalanced flag, this index becomes different!
3413 X_index = X_sel.index
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils_set_output.py:316, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
314 @wraps(f)
315 def wrapped(self, X, *args, **kwargs):
--> 316 data_to_wrap = f(self, X, *args, **kwargs)
317 if isinstance(data_to_wrap, tuple):
318 # only wrap the first output for cross decomposition
319 return_tuple = (
320 _wrap_data_with_container(method, data_to_wrap[0], X, self),
321 *data_to_wrap[1:],
322 )
File e:\MiniConda\envs\lol\Lib\site-packages\lazytransform.py:1756, in LazyTransformer.fit_transform(self, X, y)
1754 start_time = time.time()
1755 self.fit(X,y)
-> 1756 X_trans = self.xformer.transform(X)
1757 X_trans.index = self.X_index
1759 ### Here you can straight away fit and transform y ###
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\pipeline.py:903, in Pipeline.transform(self, X, **params)
901 Xt = X
902 for _, name, transform in self._iter():
--> 903 Xt = transform.transform(Xt, **routed_params[name].transform)
904 return Xt
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\pipeline.py:903, in Pipeline.transform(self, X, **params)
901 Xt = X
902 for _, name, transform in self._iter():
--> 903 Xt = transform.transform(Xt, **routed_params[name].transform)
904 return Xt
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\utils_set_output.py:316, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
314 @wraps(f)
315 def wrapped(self, X, *args, **kwargs):
--> 316 data_to_wrap = f(self, X, *args, **kwargs)
317 if isinstance(data_to_wrap, tuple):
318 # only wrap the first output for cross decomposition
319 return_tuple = (
320 _wrap_data_with_container(method, data_to_wrap[0], X, self),
321 *data_to_wrap[1:],
322 )
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\preprocessing_function_transformer.py:252, in FunctionTransformer.transform(self, X)
238 """Transform X using the forward function.
239
240 Parameters
(...) 249 Transformed input.
250 """
251 X = self._check_input(X, reset=False)
--> 252 out = self._transform(X, func=self.func, kw_args=self.kw_args)
253 output_config = _get_output_config("transform", self)["dense"]
255 if hasattr(out, "columns") and self.feature_names_out is not None:
256 # check the consistency between the column provided by transform and
257 # the the column names provided by get_feature_names_out.
File e:\MiniConda\envs\lol\Lib\site-packages\sklearn\preprocessing_function_transformer.py:379, in FunctionTransformer._transform(self, X, func, kw_args)
376 if func is None:
377 func = _identity
--> 379 return func(X, **(kw_args if kw_args else {}))
File e:\MiniConda\envs\lol\Lib\site-packages\lazytransform.py:713, in create_column_names_onehot(Xt, nlpvars, catvars, discretevars, floatvars, intvars, datevars, onehot_dict, colsize_dict, datesize_dict)
708 ### Xt is already a dense array, no need to convert it ##
709 ### Remember don't combine the next 2 lines into one. That will be a disaster.
710 ### Pandas infers data types autmatically and they always are float64. So
711 ### to avoid that I have split the data into two or three types
712 if num_len == 0:
--> 713 Xint = pd.DataFrame(Xt[:,:], columns = cols_names, dtype=np.int16)
714 return Xint
715 else:
File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\frame.py:827, in DataFrame.init(self, data, index, columns, dtype, copy)
816 mgr = dict_to_mgr(
817 # error: Item "ndarray" of "Union[ndarray, Series, Index]" has no
818 # attribute "name"
(...) 824 copy=_copy,
825 )
826 else:
--> 827 mgr = ndarray_to_mgr(
828 data,
829 index,
830 columns,
831 dtype=dtype,
832 copy=copy,
833 typ=manager,
834 )
836 # For data is list-like, or Iterable (will consume into list)
837 elif is_list_like(data):
File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\internals\construction.py:323, in ndarray_to_mgr(values, index, columns, dtype, copy, typ)
319 values = _prep_ndarraylike(values, copy=copy_on_sanitize)
321 if dtype is not None and values.dtype != dtype:
322 # GH#40110 see similar check inside sanitize_array
--> 323 values = sanitize_array(
324 values,
325 None,
326 dtype=dtype,
327 copy=copy_on_sanitize,
328 allow_2d=True,
329 )
331 # _prep_ndarraylike ensures that values.ndim == 2 at this point
332 index, columns = _get_axes(
333 values.shape[0], values.shape[1], index=index, columns=columns
334 )
File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\construction.py:622, in sanitize_array(data, index, dtype, copy, allow_2d)
618 subarr = subarr.copy()
620 else:
621 # we will try to copy by-definition here
--> 622 subarr = _try_cast(data, dtype, copy)
624 elif hasattr(data, "array"):
625 # e.g. dask array GH#38645
626 if not copy:
File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\construction.py:815, in _try_cast(arr, dtype, copy)
810 # GH#15832: Check if we are requesting a numeric dtype and
811 # that we can convert the data to the requested dtype.
812 elif dtype.kind in "iu":
813 # this will raise if we have e.g. floats
--> 815 subarr = maybe_cast_to_integer_array(arr, dtype)
816 elif not copy:
817 subarr = np.asarray(arr, dtype=dtype)
File e:\MiniConda\envs\lol\Lib\site-packages\pandas\core\dtypes\cast.py:1710, in maybe_cast_to_integer_array(arr, dtype)
1706 if not np.isfinite(arr).all():
1707 raise IntCastingNaNError(
1708 "Cannot convert non-finite values (NA or inf) to integer"
1709 )
-> 1710 raise ValueError("Trying to coerce float values to integers")
1711 if arr.dtype == object:
1712 raise ValueError("Trying to coerce float values to integers")
ValueError: Trying to coerce float values to integers
I am getting this error with my dataset when calling featurewiz as
fwiz = FeatureWiz(feature_engg = '', nrows=None, transform_target=True,
category_encoders="auto", auto_encoders='CNN_ADD', ae_options={},
add_missing=False, imbalanced=False, verbose=0)
X_train_selected, y_train = fwiz.fit_transform(X_train, y_train)
X_test_selected = fwiz.transform(X_test)
My dataset looks like this (I stripped out the float values because at the beginning I thought it was due to existing floating value columns, but it seems not)
| year | playoffs | participantid | gamelength | kills | deaths | assists | teamkills | teamdeaths | totalgold |
|---|---|---|---|---|---|---|---|---|---|
| 2022 | 0 | 100 | 2539 | 16 | 10 | 29 | 16 | 10 | 76746 |
| 2022 | 0 | 100 | 1852 | 4 | 17 | 9 | 4 | 17 | 53592 |
| 2022 | 0 | 200 | 2241 | 13 | 4 | 35 | 13 | 4 | 67474 |
| 2022 | 0 | 100 | 1983 | 9 | 6 | 9 | 9 | 6 | 62614 |
| 2022 | 0 | 100 | 2403 | 11 | 10 | 32 | 11 | 10 | 70460 |
I also tried it out on some example datasets in the examples folder. It works fine with winequality.csv but gives out same error with boston.csv. Can you help me solve the issue please.