In [None]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

col_correlated = ['totalworkingyears',
                  'years_atcompany',
                  'years_currentrole',
                  'years_lastpromotion',
                  'years_withmanager']
    
keep_cols = ['age',
             'gender',
             'businesstravel',
             'distancefromhome',
             'education',
             'joblevel',
             'maritalstatus',
             'monthlyincome',
             'numcompaniesworked',
             'overtime',
             'percentsalaryhike',
             'stockoptionlevels',
             'trainingtimeslastyear']
    
std_pca = Pipeline([('std', StandardScaler()), 
                    ('pca', PCA(n_components=0.8))
                   ])
        
col_dropper = ColumnTransformer([('drop_unused_cols', 'passthrough', keep_cols)],
                                remainder='drop'
                               )
    
corr_transformer = ColumnTransformer([('pipe_std_pca_corrcols', std_pca, col_correlated)],
                                     remainder=col_dropper
                                    )

col_names = ['pca_years_0','pca_years_1'] + keep_cols

In [None]:
#load data
df_train = pd.read_csv('attrition_train.csv')
df_test = pd.read_csv('attrition_test.csv')

#split into features
features_train = df_train.drop('attrition', axis=1)
features_test = df_test.drop('attrition', axis=1)

#clean data
corr_transformer.fit_transform(features_train)
pd.DataFrame(corr_transformer.transform(features_test), columns=col_names)