In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
import numpy as np

def preprocess_dataset(file_path):
    df = pd.read_csv(file_path)
    
    # delete（RowNumber, CustomerId, Surname）
    df = df.drop(columns=['RowNumber', 'CustomerId', 'Surname'])
    
    categorical_features = ['Geography', 'Gender']
    numeric_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 
                        'NumOfProducts', 'HasCrCard', 'IsActiveMember', 
                        'EstimatedSalary', 'Exited']
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', OneHotEncoder(drop='first'), categorical_features),
            ('num', MinMaxScaler(), numeric_features)      
        ],
        remainder='passthrough'
    )

    processed_data = preprocessor.fit_transform(df)

    cat_encoder = preprocessor.named_transformers_['cat']
    new_cat_columns = cat_encoder.get_feature_names_out(categorical_features)
    all_columns = np.concatenate([new_cat_columns, numeric_features])

    processed_df = pd.DataFrame(processed_data, columns=all_columns)

    print("processed")
    print(processed_df.describe())
    
    return processed_df

processed_data = preprocess_dataset(r"C:\Users\26332\Desktop\Churn_Modelling.csv")

processed_data.to_csv(r"G:\DataSets\Churn.csv", index=False)