# Feature Optimization 

In [1]:
# Imports
import polars as pl
import main
import variables

## Information about the Data

In [2]:
# Creates data dictionary
dictionary:main.DataDictionary = main.DataDictionary()
# Assigns default classifications
dictionary.apply_assign(variables.default_classifications,dictionary.assign_classification)
# Prints the entries as a dataframe
dictionary_df:pl.DataFrame = dictionary.frame_dictionary()
# Creates dataset object
dataset = main.Dataset()
# The data
data = dataset.data
# Creates descriptive statistics table
descriptive_stats:pl.DataFrame = (
    dataset.data
        .describe()
        .with_columns(pl.all().cast(pl.Utf8))   # cast to strings so transpose works
        .transpose(include_header=True, header_name="stat")
        .rename(
            {
                "stat":"field",
                "column_0":"count",
                "column_1":"null_count",
                "column_2":"mean",
                "column_3":"std",
                "column_4":"min",
                "column_5":"25%",
                "column_6":"50%",
                "column_7":"75%",
                "column_8":"max",
            }
        )
        .slice(1)
)



In [6]:
small_data = (
    dataset.data.select(
        [
            "ident_monitor_opt",
            "income",
            "days_from_registration",
            "days_from_login",
            "max_bkaccts",
            "success",
        ]
    )
    .with_columns(
        pl.col("ident_monitor_opt").fill_null(pl.median("ident_monitor_opt")),
        pl.col("income").fill_null(pl.median("income")),
        pl.col("days_from_registration").fill_null(pl.median("days_from_registration")),
        pl.col("days_from_login").fill_null(pl.median("days_from_login")),
        pl.col("max_bkaccts").fill_null(pl.median("max_bkaccts")),
    )
)

print(small_data.group_by('success').len())

shape: (2, 2)
┌─────────┬──────┐
│ success ┆ len  │
│ ---     ┆ ---  │
│ bool    ┆ u32  │
╞═════════╪══════╡
│ true    ┆ 3619 │
│ false   ┆ 1081 │
└─────────┴──────┘


## 