In [None]:
import pandas as pd
import numpy as np
import warnings

from sklearn.experimental import enable_iterative_imputer

from sklearn.impute import SimpleImputer
from sklearn.impute import IterativeImputer

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, MinMaxScaler, OneHotEncoder

from sklearn.pipeline import Pipeline

from sdv.datasets.demo import download_demo

warnings.filterwarnings("ignore")

def merge_data(
		PATHS = [
			"../data/campaign_engage.csv", 
			"../data/churn_modelling.csv"
		],
		num_pipeline = Pipeline(steps=[("imputer", IterativeImputer(random_state=0)),]),
		cat_pipeline = Pipeline(steps=[("imputer", SimpleImputer(strategy="most_frequent")),]),
		label = 'exited'
	):

	df1 = pd.read_csv(PATHS[0])
	df2 = pd.read_csv(PATHS[1])
	real_data, metadata = download_demo(
		modality='single_table',
		dataset_name='insurance'
	) 

	# df1.rename({i:i.lower() for i in df2.columns.values}, axis=1, inplace=True)
	# df2.rename({i:i.lower() for i in df2.columns.values}, axis=1, inplace=True)

	#
	dtype_dict = pd.DataFrame(pd.concat([df1.dtypes, (df2.dtypes)], axis=0))
	dtype_dict = dtype_dict.T.loc[:, ~dtype_dict.T.columns.duplicated()].T.copy().iloc[:, 0]

	# Merge Dataframes
	merged_df = pd.concat([df1, df2], axis=0, ignore_index=True)

	# Find numerical & categorical columns
	which_object = [i == np.dtype('O') for i in merged_df.dtypes]
	categorical_columns = merged_df.columns[which_object].values
	numerical_columns = merged_df.columns[np.invert(which_object)].values
	all_columns = np.concatenate([numerical_columns, categorical_columns])

	# Rearrange column sequence
	merged_df = merged_df.loc[:, all_columns]
	merged_df.reset_index(drop=True)
	merged_df[categorical_columns] = merged_df.loc[:, categorical_columns].astype('category')
	
	# 
	cat_dtypes = dict(merged_df.dtypes[categorical_columns])
	num_dtypes = dict(dtype_dict[numerical_columns])
	# print(dict(num_dtypes))
	dtype_dict = num_dtypes | cat_dtypes

	preprocessor = ColumnTransformer(transformers=[
			("num_pipeline", num_pipeline, numerical_columns),
			("cat_pipeline", cat_pipeline, categorical_columns),
	])
	# Apply transformation on dataset
	processed_data = preprocessor.fit_transform(merged_df)

	# Convert processed_data back to a DataFrame
	processed_df = pd.DataFrame(processed_data, columns=all_columns)

	# Convert numerical columns back to float
	processed_df.loc[:, numerical_columns] = processed_df[numerical_columns].apply(pd.to_numeric)

	# processed_df.loc[:, numerical_columns].astype(num_dtypes)
	processed_df = processed_df.astype(dtype_dict)

	X = processed_df.loc[:, processed_df.columns != label]
	y = processed_df[[label]]

	return X, y

X, y = merge_data()

In [13]:
X

Unnamed: 0,CustomerID,Age,Income,AdSpend,ClickThroughRate,ConversionRate,WebsiteVisits,PagesPerVisit,TimeOnSite,SocialShares,...,Gender,CampaignChannel,CampaignType,AdvertisingPlatform,AdvertisingTool,surname,geography,gender,hascrcard,isactivemember
0,8000,56,136912,6497.870068,0.043919,0.088031,0,2.399017,7.396803,19,...,Female,Social Media,Awareness,IsConfid,ToolConfid,Smith,France,Male,yes,yes
1,8001,69,41760,3898.668606,0.155725,0.182725,42,2.917138,5.352549,5,...,Male,Email,Retention,IsConfid,ToolConfid,Smith,France,Male,yes,yes
2,8002,46,88456,1546.429596,0.277490,0.076423,2,8.223619,13.794901,0,...,Female,PPC,Awareness,IsConfid,ToolConfid,Smith,France,Male,yes,yes
3,8003,32,44085,539.525936,0.137611,0.088004,47,4.540939,14.688363,89,...,Female,PPC,Conversion,IsConfid,ToolConfid,Smith,France,Male,yes,yes
4,8004,60,83964,1678.043573,0.252851,0.109940,0,2.046847,13.993370,6,...,Female,PPC,Conversion,IsConfid,ToolConfid,Smith,France,Male,yes,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17995,11999,43,84664,5000.944830,0.154829,0.104389,24,5.549299,7.727718,49,...,Female,Referral,Conversion,IsConfid,ToolConfid,Obijiaku,France,Male,yes,no
17996,11999,43,84664,5000.944830,0.154829,0.104389,24,5.549299,7.727718,49,...,Female,Referral,Conversion,IsConfid,ToolConfid,Johnstone,France,Male,yes,yes
17997,11999,43,84664,5000.944830,0.154829,0.104389,24,5.549299,7.727718,49,...,Female,Referral,Conversion,IsConfid,ToolConfid,Liu,France,Female,no,yes
17998,11999,43,84664,5000.944830,0.154829,0.104389,24,5.549299,7.727718,49,...,Female,Referral,Conversion,IsConfid,ToolConfid,Sabbatini,Germany,Male,yes,no
