# Data Preprocessing Notebook
This notebook handles data loading, cleaning, preprocessing, and saving.

In [None]:

# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the dataset
def load_dataset(file_path):
    """Load the dataset from a CSV file."""
    return pd.read_csv(file_path)

# Data cleaning and preprocessing
def preprocess_data(data):
    """Perform data cleaning and preprocessing."""
    # Handling missing values
    data = data.dropna()

    # Encoding categorical features if any
    label_encoders = {}
    for column in data.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column])
        label_encoders[column] = le

    # Feature normalization
    scaler = StandardScaler()
    features = data.drop('class', axis=1)
    labels = data['class']
    normalized_features = scaler.fit_transform(features)

    return normalized_features, labels, label_encoders

# Save preprocessed data to a file
def save_preprocessed_data(features, labels, feature_path, label_path):
    """Save the processed features and labels to files."""
    pd.DataFrame(features).to_csv(feature_path, index=False)
    pd.DataFrame(labels).to_csv(label_path, index=False)


In [None]:

# Example usage
file_path = 'data/kddcup99.csv'  # Update with the actual path to your dataset
data = load_dataset(file_path)
features, labels, encoders = preprocess_data(data)
save_preprocessed_data(features, labels, 'processed_features.csv', 'processed_labels.csv')
