In [1]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, cross_validate, RepeatedKFold
from xgboost import XGBClassifier
import numpy as np
import pandas as pd

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
X = train[list(test.columns)]
y = train[train.columns[~train.columns.isin(test.columns)]]

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin

class AtributosDesejados(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.colunasIndesejadas = ['PassengerId', 'Name', 'Ticket', 'Cabin']
        return self
    def transform(self, X, y=None):
        return X.drop(self.colunasIndesejadas,axis=1)


class AtributosNumericos(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.colunasNumericas = X.select_dtypes(include='number').columns
        return self
    def transform(self, X, y=None):
        return X[self.colunasNumericas]

class AtributosCategoricos(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.colunasCategoricas = X.select_dtypes(include='object').columns
        return self
    def transform(self, X, y=None):
        return X[self.colunasCategoricas]

In [4]:

pipecat = Pipeline([
    ('atributos_categoricos', AtributosCategoricos()),
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder())
])

pipenum = Pipeline([
    ('atributos_numericos', AtributosNumericos()),
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

unecaracteristicas = FeatureUnion([
    ('pipenum', pipenum),
    ('pipecat', pipecat)
])

In [6]:
preproc = Pipeline([
    ('atributos_desejados', AtributosDesejados()),
    ('unecaracteristicas', unecaracteristicas)    
])

pipetotal = Pipeline([
    ('preproc', preproc),
    ('arvore', DecisionTreeClassifier())
    # ('classificador', XGBClassifier(
    #     nthread=4,
    #     seed=47
    # ))
])

parametros = {
    'arvore__max_depth': [None] + list(range(1,20,2)),
    'arvore__criterion': ['gini', 'entropy']
}

# parametros = {
#     'max_depth': [10],
#     'n_estimators': [100],
#     'learning_rate': [0.1]
# }

modelo = GridSearchCV(pipetotal, param_grid=parametros)

scores = cross_validate(modelo, X, y, scoring = 'roc_auc',
                        cv=RepeatedKFold(n_splits=10, n_repeats=3, random_state=47)
)
print(scores)
print(np.mean(scores['test_score']))
print(np.std(scores['test_score']))

{'fit_time': array([6.90446758, 6.05168271, 5.5429914 , 5.53277206, 6.24699116,
       6.98619056, 9.02211285, 5.58198476, 5.50723982, 6.00785899,
       5.51637459, 5.73922491, 6.21163106, 5.59863281, 5.95674801,
       6.31542063, 5.77839947, 6.30803132, 5.54453516, 5.56310272,
       5.90472198, 5.73435116, 5.922261  , 5.84968519, 5.66764522,
       7.01298261, 5.62689996, 5.56172705, 6.42032099, 5.55916882]), 'score_time': array([0.01534891, 0.0139997 , 0.01472926, 0.01303267, 0.02353787,
       0.03900361, 0.01208234, 0.01962924, 0.01105022, 0.01299524,
       0.01477885, 0.03351426, 0.01474476, 0.01422071, 0.01454091,
       0.01401114, 0.01522183, 0.01494646, 0.01476383, 0.0227654 ,
       0.01650786, 0.01468754, 0.0174458 , 0.01208425, 0.0147028 ,
       0.01932526, 0.01650834, 0.01486683, 0.01490688, 0.01464176]), 'test_score': array([0.7962963 , 0.78101604, 0.91069519, 0.78896104, 0.83622449,
       0.7990625 , 0.77222222, 0.86042781, 0.83622449, 0.85734149,
       0.9072302 

In [7]:
modelo.fit(X,y)
y_pred = modelo.predict(test)
result = test[['PassengerId']]
result['Survived'] = y_pred
result.to_csv('submission.csv',index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
