In [None]:
# 01_data_wrangling.ipynb
"""
This notebook covers data cleaning and preprocessing techniques.
"""

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load a dataset
file_path = r"C:\Users\tahsi\OneDrive\Desktop\Research Papers\Customer Churn\Customer-churn-records.csv"
try:
    df = pd.read_csv(file_path)
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print(f"File not found at {file_path}. Please check the file path.")
    # Generate synthetic data if file is missing
    df = pd.DataFrame({
        "A": np.random.randint(0, 100, 10),
        "B": np.random.randint(0, 100, 10),
        "C": np.random.randint(0, 100, 10)
    })
except Exception as e:
    print(f"An error occurred: {e}")

# Preview the dataset
print("Dataset Preview:")
print(df.head())

# Check dataset info and missing values
print("\nDataset Info:")
print(df.info())
print("\nMissing Values:")
print(df.isnull().sum())

# Handle missing values
# Fill missing numeric values with the mean
df.fillna(df.mean(), inplace=True)

# Drop duplicate rows
df.drop_duplicates(inplace=True)

# Normalize numerical features
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Save the cleaned dataset
output_path = r"C:\Users\tahsi\OneDrive\Desktop\Research Papers\Customer Churn\cleaned_customer_churn.csv"
try:
    df_scaled.to_csv(output_path, index=False)
    print(f"Cleaned dataset saved successfully at {output_path}.")
except Exception as e:
    print(f"Error saving the file: {e}")