In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin
def drop_columns(df, columns):
    df.drop(columns, axis=1,inplace=True)
    return df

def fill_NA(df, columns, value):
    for c in columns:
        df[c].fillna(value,inplace=True)
    return df

def get_dummies_for_category_columns(df, columns):
    df = pd.concat([df,pd.get_dummies(df[columns])],axis=1)
    df.drop(columns, axis=1, inplace=True)
    return df

def drop_huge_buildings(df):
    # Drop huge buildings
    df=df[df["1stFlrSF"]<4000]
    df=df[df["LotArea"]<50000]
    df=df[df["BsmtFullBath"]<3] # why would you even have 3 bath in the basement?
    return df

def set_type(df, column, type_name):
    df[column] = df[column].astype(type_name)
    return df

def drop_non_correlated_columns(df, min_corr):
    salesPriceCorr = df.corr()['SalePrice']
    good_corr = np.logical_or(salesPriceCorr<-min_corr, salesPriceCorr>min_corr)
    onlyCorrelatedColumns = salesPriceCorr[good_corr]
    df = df[onlyCorrelatedColumns.index]
    return df

def log_tranform(df, columns):
    for c in columns:
        df['log_'+c] = np.log1p(df[c])       
    return drop_columns(df, columns)


def create_exists_column(df, column):
    df['exists_'+ column] = df[column] > 0
    return df;

def drawTFModel(history, model, trainX, trainy, testX, testy):
    # evaluate the model
    train_mse = model.evaluate(trainX, trainy, verbose=0)
    test_mse = model.evaluate(testX, testy, verbose=0)
    print('Train Error: %.3f, Test Error: %.3f' % (np.sqrt(train_mse), np.sqrt(test_mse)))
    # plot loss during training
    plt.ylim(0,0.3)
    plt.title('sqrt mean_squared_error')
    plt.plot(np.sqrt(history.history['loss']), label='train')
    plt.plot(np.sqrt(history.history['val_loss']), label='test')
    # pyplot.plot(history.history['acc'], label='train_acc')
    # pyplot.plot(history.history['val_acc'], label='test_acc')
    plt.legend()
    plt.show()

class ColumnSelector(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)

        try:
            return X[self.columns]
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)
class DropColumns(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)

        try:
            X.drop(X.columns, axis=1,inplace=True)
            return X
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)

class DummiesTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, column, keys):
        self.column = column
        self.keys = keys

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)

        try:
            X.loc[:,self.column]=X[self.column].astype(pd.api.types.CategoricalDtype(categories=self.keys))
            X = pd.concat([X,pd.get_dummies(X[self.column],prefix=self.column)],axis=1)
            X.drop(self.column, axis=1, inplace=True)
            return X
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)

            
class CategoryTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, column, keys):
        self.column = column
        self.keys = keys

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)

        try:
            X[self.column]=X[self.column].astype(pd.api.types.CategoricalDtype(categories=self.keys))
            return X
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)
 
            
class FillNA(BaseEstimator, TransformerMixin):
    def __init__(self, columns, fill_value):
        self.columns = columns
        self.val = fill_value

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)

        try:
          for c in self.columns:
            X[c].fillna(self.val,inplace=True)
          return X
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)

class ApplyTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, fn, name):
        self.fn = fn
        self.name = name
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)
        X.loc[:,self.name] = X.apply(self.fn, axis=1)
        return X
    
    