# **Preprocessing Component Test Notebook**

### **Summary of Preprocessing Results**
- ✅ **Loaded Dataset**: Verified dataset name and shape.
- ✅ **Handled Missing Values**: Checked how many rows were dropped.
- ✅ **Encoded Categorical Features**: Ensured categorical variables are transformed properly.

This confirms that the preprocessing functions work correctly.


In [None]:
import pandas as pd
from src.preprocessing.data_loader import load_dataset
from src.preprocessing.missing_value_handler import handle_missing_values
from src.preprocessing.encoding import encode_categorical_features

# Define dataset path
original_dataset_path = "../datasets/original/sample_dataset.csv"  # Modify with actual dataset
separator = ","  # Adjust based on dataset format
target_column = "Target"  # Adjust based on dataset


In [None]:
# Load dataset
original_data, dataset_name = load_dataset(original_dataset_path, separator)
print(f"Dataset Name: {dataset_name}")
print(f"Original Data Shape: {original_data.shape}")
original_data.head()


In [None]:
# Handle missing values
cleaned_data, dropped_rows = handle_missing_values(original_data, strategy="drop")
print(f"Dropped {dropped_rows} rows due to missing values")
print(f"Cleaned Data Shape: {cleaned_data.shape}")
cleaned_data.head()


In [None]:

# Encode categorical features using Binary Encoding
encoded_data = encode_categorical_features(cleaned_data, target_column)
print(f"Encoded Data Shape: {encoded_data.shape}")
encoded_data.head()