In [1]:
import numpy as np
from sklearn.preprocessing import TargetEncoder
import polars as pl

# Step 1: Create the original DataFrame
X = np.array([["dog"] * 20 + ["cat"] * 30 + ["snake"] * 38], dtype=object).T
X = [item for sublist in X.tolist() for item in sublist]
y = [90.3] * 5 + [80.1] * 15 + [20.4] * 5 + [20.1] * 25 + [21.2] * 8 + [49] * 30

df = pl.DataFrame({'col1': X, 'col2': y})

# Step 2: Apply Target Encoding to 'col1'
enc_auto = TargetEncoder(smooth="auto")
X_trans = enc_auto.fit_transform(df.select(pl.col('col1')).to_numpy(), df.select(pl.col('col2')).to_numpy().ravel())

# Step 3: Reconstruct the DataFrame by replacing 'col1' with the encoded values while keeping 'col2'
df = df.with_columns(pl.DataFrame({'col1': X_trans.flatten()}))

# Step 4: Print the modified DataFrame to check the result
print(df)

shape: (88, 2)
┌───────────┬──────┐
│ col1      ┆ col2 │
│ ---       ┆ ---  │
│ f64       ┆ f64  │
╞═══════════╪══════╡
│ 82.570967 ┆ 90.3 │
│ 82.571418 ┆ 90.3 │
│ 82.728915 ┆ 90.3 │
│ 82.571126 ┆ 90.3 │
│ 82.431867 ┆ 90.3 │
│ …         ┆ …    │
│ 43.440872 ┆ 49.0 │
│ 43.627347 ┆ 49.0 │
│ 41.600696 ┆ 49.0 │
│ 43.632816 ┆ 49.0 │
│ 43.627347 ┆ 49.0 │
└───────────┴──────┘


In [3]:
import numpy as np
from sklearn.preprocessing import TargetEncoder
import polars as pl

# Let's assume feat_cat_nominal is your list of original column names
feat_cat_nominal = ["col1", "col2", "col3"]  # Example list of column names
feat_target_variable = "col_target"  # Example target variable

# Example DataFrame (modify as needed)
df = pl.DataFrame({
    "col1": ["dog", "cat", "snake"] * 10,
    "col3": ["red", "green", "blue"] * 10,
    "col2": ["apple", "orange", "banana"] * 10,
    "col_target": [1, 2, 3] * 10
})

# Invoke TargetEncoder Class with smoothness "auto"
enc_auto = TargetEncoder(smooth="auto")

# Fit the TargetEncoder and Transform the nominal features based on the target variable
X_trans = enc_auto.fit_transform(
    df.select(feat_cat_nominal).to_numpy(), 
    df.select(pl.col(feat_target_variable)).to_numpy().ravel()
)

# Dynamically rename the encoded columns based on the original column names
column_rename_dict = {f"column_{i}": name for i, name in enumerate(feat_cat_nominal)}
encoded_features_df = pl.DataFrame(X_trans).rename(column_rename_dict)

# Replace the original columns in the DataFrame with the encoded values
df = df.with_columns(encoded_features_df)

# Print the modified DataFrame
print(column_rename_dict)

{'column_0': 'col1', 'column_1': 'col2', 'column_2': 'col3'}


In [4]:
l1 = ["eat", "sleep", "repeat"]
s1 = "geek"

# creating enumerate objects
obj1 = enumerate(l1)
obj2 = enumerate(s1)

print ("Return type:", type(obj1))
print (list(enumerate(l1)))

# changing start index to 2 from 0
print (list(enumerate(s1, 2)))

Return type: <class 'enumerate'>
[(0, 'eat'), (1, 'sleep'), (2, 'repeat')]
[(2, 'g'), (3, 'e'), (4, 'e'), (5, 'k')]
