# 🍄 Project: Mushroom Toxicity Classification (Kaggle Challenge)

## 🎯 Project Goal
The objective of this project, based on the **Kaggle Playground Series - Season 4, Episode 8** challenge, was to build a machine learning classification model to predict with the highest possible accuracy whether a given mushroom is **edible** or **poisonous** based on its physical characteristics.  
https://www.kaggle.com/competitions/playground-series-s4e8
## 📈 Kaggle Performance
**Accuracy Score: 0.98481**
**Ranking (rank/all competitors): 555/2422**  
https://www.kaggle.com/competitions/playground-series-s4e8/leaderboard?search=MAkowski+A
## 🛠️ Methodology and Pipeline
1. **Exploratory Data Analysis (EDA):** Investigation of the distribution and relationship of categorical features.
2. **Data Preprocessing:** Handling categorical variables using **One-Hot Encoding**.
3. **Modeling:** Implementation of a **Gradient Boosting Classifier (or specify your chosen model, e.g., Logistic Regression, XGBoost, or Random Forest)**.
4. **Evaluation:** Assessment using key metrics such as **Accuracy** and **F1-Score**.

---

In [1]:
# Standard Libraries Imports
import pandas as pd
import numpy as np

# --- Pandas Display Options ---
# Ensure all columns are displayed when printing a DataFrame to the console.
pd.set_option('display.max_columns', None) 
# Prevent printing long DataFrames on multiple lines (keeps all columns on one line).
pd.set_option('display.expand_frame_repr', False)

import time
from datetime import timedelta, datetime
import os
import pickle
import gc
import matplotlib.pyplot as plt
import scipy.stats
from tqdm import tqdm # For progress bar in the imputer

# ML/Statistics Libraries
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import matthews_corrcoef, roc_auc_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.impute import KNNImputer

# Model Libraries
from catboost import CatBoostClassifier, Pool
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# === GLOBAL VARIABLES AND CONFIGURATION ===

# Column Names
ID_COLUMN = 'id'
TARGET_COLUMN = 'class'

# Data Splitting Parameters
TRAIN_SPLIT_FRACTION = 1.0 # 1.0 means using the entire training set
VALIDATION_SPLIT_SIZE = 0.035
RANDOM_STATE = 42

# Data Paths
DATA_SUBFOLDER = 'playground-series-s4e8'
TRAIN_FILE_PATH = os.path.join(DATA_SUBFOLDER, "train.csv")
TEST_FILE_PATH = os.path.join(DATA_SUBFOLDER, "test.csv")

# SUBMISSION_DIR will be used to create a unique folder for all results and submission files
SUBMISSION_DIR = datetime.now().strftime('%Y-%m-%d_%H-%M-%S_submission')

## Loading and Initial Data Preprocessing

In [2]:
try:
    df_test_received  = pd.read_csv(TEST_FILE_PATH)
    df_train_received = pd.read_csv(TRAIN_FILE_PATH)
except FileNotFoundError:
    print("Error: train.csv or test.csv not found. Please ensure they are in the correct path.")
    raise # Stop execution if files are not found

print('df_test_received unique cap-shape:', df_test_received['cap-shape'].nunique())
df_test_received = df_test_received.set_index(ID_COLUMN)
test_indices = df_test_received.index.values

print('df_train_received unique cap-shape:', df_train_received['cap-shape'].nunique())
df_train_received = df_train_received.set_index(ID_COLUMN)
print('df_train_received shape:', df_train_received.shape)

# Apply training split fraction (if TRAIN_SPLIT_FRACTION < 1.0)
df_train_received = df_train_received[:int(len(df_train_received) * TRAIN_SPLIT_FRACTION)]
# Store full training indices for later use (before validation split)
train_indices_full = df_train_received.index.values

# Remove the target column from the training set before combining/preprocessing
print('\ndifrence between train and test ',df_train_received.columns.drop(df_test_received.columns))

y_train_full = df_train_received[TARGET_COLUMN]
X_train_full = df_train_received.drop(columns=[TARGET_COLUMN])
print("\ny_train_full\n", y_train_full.head())
X_train_full.head()


df_test_received unique cap-shape: 62
df_train_received unique cap-shape: 74
df_train_received shape: (3116945, 21)

difrence between train and test  Index(['class'], dtype='object')

y_train_full
 id
0    e
1    p
2    e
3    e
4    e
Name: class, dtype: object


Unnamed: 0_level_0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,8.8,f,s,u,f,a,c,w,4.51,15.39,,,w,,,f,f,,d,a
1,4.51,x,h,o,f,a,c,n,4.79,6.48,,y,o,,,t,z,,d,w
2,6.94,f,s,b,f,x,c,w,6.85,9.93,,s,n,,,f,f,,l,w
3,3.88,f,y,g,f,s,,g,4.16,6.53,,,w,,,f,f,,d,u
4,5.85,x,l,w,f,d,,w,3.37,8.36,,,w,,,f,f,,g,a


# Feature Analysis and Selection  
This phase focuses on understanding data quality (missing values) and identifying features with significant predictive power using the  
**Chi-Squared Test for independence** ($ \chi^2 $). Features found to be statistically independent of the target variable will be removedd.

In [3]:
def get_feature_lists(df_train_R, df_test_R, target_series):
    """
    Identifies numerical/categorical columns and performs initial feature selection
    using the Chi-squared test for categorical features against the target.
    """
    
    # Placeholder for Missing Data Check (replaces custom class method)
    print('\n--- Missing Data Check (Train) ---')
    print(df_train_R.isnull().sum()[df_train_R.isnull().sum() > 0] / len(df_train_R))
    print('\n--- Missing Data Check (Test) ---')
    print(df_test_R.isnull().sum()[df_test_R.isnull().sum() > 0] / len(df_test_R))
    
    # Numerical and Categorical columns identified from the dataset description
    list_num_columns = ['cap-diameter', 'stem-height', 'stem-width']
    list_cat_columns = [
        'cap-shape', 'cap-surface', 'cap-color', 'does-bruise-or-bleed', 
        'gill-attachment', 'gill-spacing', 'gill-color', 'stem-root',
        'stem-surface', 'stem-color', 'veil-type', 'veil-color', 
        'has-ring', 'ring-type', 'spore-print-color', 'habitat', 'season'
    ]
    
    # --- Chi-squared Test for Categorical Feature Importance ---
    alpha = 0.05
    unimportant_cols = []
    
    print("\n--- Chi-squared Feature Importance Check (Alpha=0.05) ---")
    
    for col in list_cat_columns:
        # Handle potential NaNs by treating them as a 'MISSING' category
        feature_series = df_train_R[col].astype('category')
        feature_series = feature_series.cat.add_categories('MISSING')
        feature_series = feature_series.fillna('MISSING')
            
        # Create contingency table
        category_vs_class_counts = pd.crosstab(feature_series, target_series)
            
        try:
            # Check for minimum dimensions and non-zero counts
            if category_vs_class_counts.min().min() == 0 or category_vs_class_counts.shape[0] < 2 or category_vs_class_counts.shape[1] < 2:
                raise ValueError("Contingency table has zero counts or too few dimensions.")
                    
            # Perform Chi-squared test
            chi2, p, dof, expected = scipy.stats.chi2_contingency(category_vs_class_counts.values)
                
            if p >= alpha:
                print(f"-> {col} is NOT important (p = {p:.6f}). Adding to removal list.")
                unimportant_cols.append(col)
                    
        except ValueError:
            # If the test fails (e.g., zero variance), treat as unimportant
            unimportant_cols.append(col) 
                
    print(f'\nUnimportant columns identified: {unimportant_cols}')
    
    # Remove unimportant columns from feature lists
    # Dla kolumn kategorycznych
    filtered_cat_columns = []
    for col in list_cat_columns:
        if col not in unimportant_cols:
            filtered_cat_columns.append(col)
    list_cat_columns = filtered_cat_columns
    
    #list_num_columns = [col for col in list_num_columns if col not in unimportant_cols]
    
    return list_num_columns, list_cat_columns, unimportant_cols

In [4]:
# Execute the function to get final feature lists
NUM_FEATURES, CAT_FEATURES, UNIMPORTANT_FEATURES = get_feature_lists(
    df_train_received, df_test_received, y_train_full)

print("UNIMPORTANT_FEATURES ",UNIMPORTANT_FEATURES)

# === Combining DataFrames for Consistent Preprocessing ===

# Drop the unimportant features from both datasets
df_train_received = df_train_received.drop(columns=UNIMPORTANT_FEATURES, errors='ignore')
df_test_received = df_test_received.drop(columns=UNIMPORTANT_FEATURES, errors='ignore')
print(df_train_received.columns)
# Concatenate for global imputation and scaling/encoding
df_combined = pd.concat([df_train_received, df_test_received], axis=0)
print(f'\nCombined DataFrame shape: {df_combined.shape}')


--- Missing Data Check (Train) ---
cap-diameter            0.000001
cap-shape               0.000013
cap-surface             0.215282
cap-color               0.000004
does-bruise-or-bleed    0.000003
gill-attachment         0.168093
gill-spacing            0.403740
gill-color              0.000018
stem-root               0.884527
stem-surface            0.635514
stem-color              0.000012
veil-type               0.948843
veil-color              0.879370
has-ring                0.000008
ring-type               0.041348
spore-print-color       0.914255
habitat                 0.000014
dtype: float64

--- Missing Data Check (Test) ---
cap-diameter            3.368682e-06
cap-shape               1.491845e-05
cap-surface             2.150682e-01
cap-color               6.256124e-06
does-bruise-or-bleed    4.812403e-06
gill-attachment         1.683480e-01
gill-spacing            4.040469e-01
gill-color              2.358077e-05
stem-height             4.812403e-07
stem-root           

## DATA SPLIT

In [5]:
# --- DATA SPLIT ---
print('\n--- DATA SPLIT ---')
X_train_full_data = df_train_received.drop([TARGET_COLUMN], axis=1)
y_train_full_data = df_train_received[TARGET_COLUMN]

X_train_split, X_valid_split, y_train_split, y_valid_split = train_test_split(
    X_train_full_data, y_train_full_data,
    test_size=VALIDATION_SPLIT_SIZE,
    random_state=RANDOM_STATE,
    stratify=y_train_full_data
)
train_indices = X_train_split.index.values
valid_indices = X_valid_split.index.values

# Free memory
del X_train_split, X_valid_split, X_train_full_data, y_train_full_data
gc.collect()



--- DATA SPLIT ---


4

# Resolut after split
indices  
train_indices  
valid_indices  

values  
y_train_split   
y_valid_split

## preprocess_data Transform Function 
1 Usuwanie nieistotnych kolumn  
2 Normalizacja cech numerycznych Skalowanie  
3 Imputacja braków (KNNImputer)  
4 Czyszczenie cech kategorycznych  
5 One-Hot Encoding  
6 Konwersja numerycznych na string (dla CatBoost)  

In [6]:
def preprocess_data(df, list_num_columns, list_cat_columns, unimportant_cols,
                    drop_unimportant_columns, normalization_method, knn_imputer_method, knn_neighbors, 
                    cleaning_cat_features, threshold, cat_one_hot_encoding, num_columns_to_str):
    
    # 1. Dropping unimportant columns
    if drop_unimportant_columns:
        print(f"Dropping unimportant columns: {unimportant_cols}")
        df = df.drop(columns=unimportant_cols, errors='ignore') # errors='ignore' na wypadek
        
    # 2. Scaling numerical features Normalization
    if normalization_method == 'mm':
        print('Applying MinMaxScaler (data in range[0,1])')
        scaler = MinMaxScaler()
        df[list_num_columns] = scaler.fit_transform(df[list_num_columns])
    elif normalization_method == 'ss':
        print('Applying StandardScaler (datas average 0 std 1)')
        scaler = StandardScaler()
        df[list_num_columns] = scaler.fit_transform(df[list_num_columns])

    # 3. Imputation (KNNImputer)
    if knn_imputer_method == 'knn_cat_and_num':

        # Numeric encoding for categorical features required for KNN
        df_temp = df.copy()
        for col in tqdm(list_cat_columns, desc="Converting categorical to codes for KNN"):
            df_temp[col] = df_temp[col].astype('category').cat.codes.replace(-1, np.nan)
        
        # Subset of columns for imputation
        cat_cols_for_knn = ['cap-shape','cap-color','does-bruise-or-bleed','gill-color','stem-color','has-ring','habitat','cap-surface']
        combined_list = cat_cols_for_knn + list_num_columns
        
        # Imputation
        imputer = KNNImputer(n_neighbors=knn_neighbors)
        arr_imputed = imputer.fit_transform(df_temp[combined_list])
        df[combined_list] = pd.DataFrame(arr_imputed, columns=combined_list, index=df.index)
        del df_temp, arr_imputed
        gc.collect()
        
    elif knn_imputer_method == 'knn_num_only':
        print('Applying KNNImputer on numerical columns only')
        imputer = KNNImputer(n_neighbors=knn_neighbors)
        arr_imputed = imputer.fit_transform(df[list_num_columns])
        df[list_num_columns] = pd.DataFrame(arr_imputed, columns=list_num_columns, index=df.index)
        del arr_imputed
        gc.collect()
        
    # 4. Cleaning categorical features (missing/noise)
    if cleaning_cat_features:
        print(f'Cleaning categorical features with threshold {threshold}')
        for col in list_cat_columns:
            # Ensure column is of type 'category'
            if df[col].dtype.name != 'category':
                 df[col] = df[col].astype('category')
            
            # Add 'missing' category and fill NaNs
            if 'missing' not in df[col].cat.categories:
                df[col] = df[col].cat.add_categories('missing')
            df[col] = df[col].fillna('missing')
            
            # Add 'noise' category and group rare values
            if 'noise' not in df[col].cat.categories:
                df[col] = df[col].cat.add_categories('noise')

            # Group rare values below threshold into 'noise'
            count = df[col].value_counts(dropna=False)
            less_freq = count[count < threshold].index
            df[col] = df[col].apply(lambda x: 'noise' if x in less_freq else x)
    else:
        # Otherwise, simply convert to category type
        df[list_cat_columns] = df[list_cat_columns].astype('category') 
        
    # 5. One-Hot Encoding
    if cat_one_hot_encoding:
        print('Applying One-Hot Encoding')
        df = pd.get_dummies(df, columns=list_cat_columns, drop_first=True, dtype=int)
        
    # 6. Converting numerical to string (For CatBoost if we want to treat them as categorical)
    if num_columns_to_str:
        print('Converting numerical columns to string (for CatBoost)')
        df[list_num_columns] = df[list_num_columns].astype('str') 
        
    print('\n--- Final Missing Data Check ---')
    print(df.isnull().sum()[df.isnull().sum() > 0] / len(df))
    
    return df

## Konfiguracja i Zastosowanie Transformacji
Ujednolicam nazwy kluczy w słowniku data_parameters.

In [7]:
# The combined dataframe df_combined was created in the previous step.
df_train_test = pd.concat([df_train_received.drop(columns=[TARGET_COLUMN], errors='ignore'), 
                           df_test_received])

# --- Parameters for Preprocessing ---
data_parameters_catboost = {
    'drop_unimportant_columns': True,
    'normalization_method': 'mm',
    'knn_imputer_method': 'knn_num_only', # Zmiana nazwy na czytelniejszą
    'knn_neighbors': 64,
    'cleaning_cat_features': True,
    'threshold': 101,
    'cat_one_hot_encoding': False, # CatBoost działa lepiej bez OHE
    'num_columns_to_str': False
}
# Execute Preprocessing on the combined data
# We MUST pass UNIMPORTANT_FEATURES as a separate argument because it's required by the function signature
df_all_data = preprocess_data(
    df_train_test, NUM_FEATURES, CAT_FEATURES, UNIMPORTANT_FEATURES,
    **data_parameters_catboost
)
# --- Splitting Data After Transformation ---
print('\n--- SPLITTING PROCESSED DATA ---')

# Separate processed train and test sets
X_train_processed = df_all_data.loc[train_indices_full]
X_test_processed = df_all_data.loc[test_indices]
# Clean up memory
del df_all_data
gc.collect()

# Final Validation Split (Stratified)
X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train_processed, y_train_full,
    test_size=VALIDATION_SPLIT_SIZE,
    random_state=RANDOM_STATE,
    stratify=y_train_full
)

# Ensure categorical columns remain 'category' dtype after splitting for CatBoost
for col in CAT_FEATURES:
    if col in X_train_final.columns:
        X_train_final[col] = X_train_final[col].astype('category')
        X_val[col] = X_val[col].astype('category')


print(f'Final Training set shape: {X_train_final.shape}')
print(f'Validation set shape: {X_val.shape}')
print(f'Test set shape (processed): {X_test_processed.shape}')

# Next: Train the CatBoost Model

Dropping unimportant columns: ['cap-shape', 'cap-surface', 'cap-color', 'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color', 'stem-root', 'stem-surface', 'stem-color', 'veil-type', 'veil-color', 'has-ring', 'ring-type', 'spore-print-color', 'habitat']
Applying MinMaxScaler (data in range[0,1])
Applying KNNImputer on numerical columns only
Cleaning categorical features with threshold 101

--- Final Missing Data Check ---
Series([], dtype: float64)

--- SPLITTING PROCESSED DATA ---
Final Training set shape: (3007851, 4)
Validation set shape: (109094, 4)
Test set shape (processed): (2077964, 4)


##  Trening CatBoost

In [8]:
# === Model 1: CatBoost Classifier Training ===
print('\n--- Model 1: CatBoost Training Started ---')

# Prepare CatBoost Pool (optimal data format for CatBoost)
train_pool = Pool(
    data=X_train_final, 
    label=y_train_final, 
    cat_features=CAT_FEATURES
)

val_pool = Pool(
    data=X_val, 
    label=y_val, 
    cat_features=CAT_FEATURES
)

# CatBoost Hyperparameters (using standard/conservative values)
cat_params = {
    'iterations': 5000,
    'learning_rate': 0.01,
    'depth': 6,
    'l2_leaf_reg': 3,
    'random_seed': RANDOM_STATE,
    'loss_function': 'Logloss',
    'eval_metric': 'AUC',
    'verbose': 500, # Print status every 500 iterations
    'early_stopping_rounds': 500,
    'allow_writing_files': False,
    'od_type': 'Iter'
}

cat_model = CatBoostClassifier(**cat_params)
cat_model.fit(
    train_pool, 
    eval_set=val_pool,
    cat_features=CAT_FEATURES 
)

# Evaluation
cat_val_preds = cat_model.predict_proba(X_val)[:, 1]
cat_val_auc = roc_auc_score(y_val, cat_val_preds)
print(f'CatBoost Validation AUC: {cat_val_auc:.5f}')


--- Model 1: CatBoost Training Started ---


CatBoostError: cat_features, text_features, embedding_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline should have the None type when X has catboost.Pool type.

## Przygotowanie Danych i Trening Keras

In [None]:
# === Preparing Data for Keras (One-Hot Encoding) ===
print('\n--- Preparing Data for Keras (One-Hot Encoding) ---')

# --- 1. One-Hot Encoding (OHE) for Keras ---
# Apply OHE globally to ensure test data is handled correctly
# Note: We only apply OHE to the relevant columns from all three sets (train, val, test)

# Identify the columns needed for OHE (all categorical features)
ohe_cols = [col for col in CAT_FEATURES if col in X_train_final.columns]

# Perform OHE on the full processed dataset subset (train + val + test)
df_ohe = pd.concat([X_train_final, X_val, X_test_processed], axis=0)

# Perform OHE only on the categorical columns
df_ohe = pd.get_dummies(df_ohe, columns=ohe_cols, dummy_na=False, drop_first=False)

# Separate back into Keras-ready sets
X_train_keras = df_ohe.loc[X_train_final.index].drop(columns=ohe_cols, errors='ignore')
X_val_keras = df_ohe.loc[X_val.index].drop(columns=ohe_cols, errors='ignore')
X_test_keras = df_ohe.loc[X_test_processed.index].drop(columns=ohe_cols, errors='ignore')

# Drop the original categorical columns (now OHE)
X_train_keras = X_train_keras.select_dtypes(exclude=['category'])
X_val_keras = X_val_keras.select_dtypes(exclude=['category'])
X_test_keras = X_test_keras.select_dtypes(exclude=['category'])

print(f'Keras Training set size: {X_train_keras.shape[1]} features')

# --- 2. Keras Model Definition and Training ---

def create_keras_model(input_shape):
    """Defines and compiles the sequential Keras model."""
    tf.random.set_seed(RANDOM_STATE) # Set seed for reproducibility
    
    model = Sequential([
        Input(shape=(input_shape,)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(32, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        
        # Output layer for binary classification
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam', 
        loss='binary_crossentropy', 
        metrics=[tf.keras.metrics.AUC(name='auc')]
    )
    return model

# Initialize and Compile the Model
input_dim = X_train_keras.shape[1]
keras_model = create_keras_model(input_dim)
print(keras_model.summary())

# Callbacks
callbacks = [
    EarlyStopping(monitor='val_auc', patience=20, mode='max', restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

# Training
print('\n--- Model 2: Keras Training Started ---')
keras_model.fit(
    X_train_keras, y_train_final,
    validation_data=(X_val_keras, y_val),
    epochs=100, 
    batch_size=256,
    callbacks=callbacks,
    verbose=1
)

# Evaluation
keras_val_preds = keras_model.predict(X_val_keras).flatten()
keras_val_auc = roc_auc_score(y_val, keras_val_preds)
print(f'Keras Validation AUC: {keras_val_auc:.5f}')

## Predykcje i Submisja

In [None]:
# === Final Predictions and Ensemble ===
print('\n--- Final Predictions and Submission ---')

# 1. Predictions on the Test Set
cat_test_preds = cat_model.predict_proba(X_test_processed)[:, 1]
keras_test_preds = keras_model.predict(X_test_keras).flatten()

# 2. Ensemble (Simple Averaging)
ensemble_test_preds = (cat_test_preds + keras_test_preds) / 2

print(f'Ensemble Test Preds Mean: {ensemble_test_preds.mean():.4f}')

# 3. Create Submission File and Save Models
submission_df = pd.DataFrame({
    ID_COLUMN: test_indices,
    TARGET_COLUMN: ensemble_test_preds
})
submission_df = submission_df.set_index(ID_COLUMN)

# Define file paths using the SUBMISSION_DIR variable
submission_folder_path = SUBMISSION_DIR
os.makedirs(submission_folder_path, exist_ok=True)
submission_file_path = os.path.join(submission_folder_path, 'submission.csv')

submission_df.to_csv(submission_file_path)

print(f'\nSUCCESS: Submission file saved to: {submission_file_path}')

# 4. Save Models
catboost_model_path = os.path.join(submission_folder_path, 'catboost_model.bin')
keras_model_path = os.path.join(submission_folder_path, 'keras_model.h5')

cat_model.save_model(catboost_model_path)
keras_model.save(keras_model_path)

print('Models saved successfully.')