In [None]:
import pandas as pd
import numpy as np
if 'transformer' not in globals():
    from mage_ai.data_preparation.decorators import transformer
if 'test' not in globals():
    from mage_ai.data_preparation.decorators import test


@transformer
def transform(df, *args, **kwargs):
    """
    Template code for a transformer block.

    Add more parameters to this function if this block has multiple parent blocks.
    There should be one parameter for each output variable from each parent block.

    Args:
        data: The output from the upstream parent block
        args: The output from any additional upstream blocks (if applicable)

    Returns:
        Anything (e.g. data frame, dictionary, array, int, str, etc.)
    """
    # Specify your transformation logic here
    df['Customer_ID'] = df.index

    df = df.replace(to_replace="?",value=np.nan)

    categorical_cols = ['A1','A4','A5','A6','A7','A9','A10','A11','A12','A13']
    df[categorical_cols] = df[categorical_cols].apply(lambda col:col.fillna(col.value_counts().index[0]))

    df[['A2','A14']] = df[['A2','A14']].astype(float)

    numeric_cols = df.select_dtypes(include=np.number).columns
    df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())


    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()

    # apply le on categorical feature columns
    df[categorical_cols] = df[categorical_cols].apply(lambda col: le.fit_transform(col))

    df = df.replace(to_replace=["+", "-"],value=[0,1])

    df = df.drop(['A5','A11','A12','A13'], axis=1)

    X = df.iloc[:,:11]
    y = df['Target']

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,random_state=0)

    train_indices = X_train.index
    test_indices = X_test.index

    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)

    scaled_X_train = pd.DataFrame(scaled_X_train, index=train_indices, columns=X_train.columns)
    scaled_X_test = pd.DataFrame(scaled_X_test, index=test_indices, columns=X_test.columns)

    from sklearn.linear_model import LogisticRegression

    log_reg = LogisticRegression()
    log_reg.fit(scaled_X_train,y_train)

    y_train_pred = log_reg.predict(scaled_X_train)
    y_test_pred = log_reg.predict(scaled_X_test)

    train_predictions_df = pd.DataFrame({'predictions': y_train_pred}, index=train_indices)
    test_predictions_df = pd.DataFrame({'predictions': y_test_pred}, index=test_indices)

    X_train_with_predictions = X_train.merge(train_predictions_df, left_index=True, right_index=True)
    X_test_with_predictions = X_test.merge(test_predictions_df, left_index=True, right_index=True)

    new_df = pd.concat([X_train_with_predictions, X_test_with_predictions])

    data = new_df.merge(df[['Customer_ID', 'Target']], left_index=True, right_index=True).sort_index()

    from sklearn.metrics import f1_score
    from sklearn.metrics import mean_squared_error
    print("Model Accuracy: ", log_reg.score(scaled_X_test, y_test))
    print("Model F1 Score", f1_score(y_test, y_test_pred))
    print("Model Mean Squared Error", mean_squared_error(y_test,y_test_pred))

    A1_dim = df[['A1']].reset_index(drop=True)
    A1_dim['A1_ID'] = A1_dim.index
    A1_dim = A1_dim[['A1_ID','A1']]

    A4_dim = df[['A4']].reset_index(drop=True)
    A4_dim['A4_ID'] = A4_dim.index
    A4_dim = A4_dim[['A4_ID','A4']]

    A6_dim = df[['A6']].reset_index(drop=True)
    A6_dim['A6_ID'] = A6_dim.index
    A6_dim = A6_dim[['A6_ID','A6']]

    A7_dim = df[['A7']].reset_index(drop=True)
    A7_dim['A7_ID'] = A7_dim.index
    A7_dim = A7_dim[['A7_ID','A7']]

    A9_dim = df[['A9']].reset_index(drop=True)
    A9_dim['A9_ID'] = A9_dim.index
    A9_dim = A9_dim[['A9_ID','A9']]

    A10_dim = df[['A10']].reset_index(drop=True)
    A10_dim['A10_ID'] = A10_dim.index
    A10_dim = A10_dim[['A10_ID','A10']]

    fact_table = data.merge(A1_dim, left_on='Customer_ID', right_on='A1_ID') \
                 .merge(A4_dim, left_on='Customer_ID', right_on='A4_ID') \
                 .merge(A6_dim, left_on='Customer_ID', right_on='A6_ID') \
                 .merge(A7_dim, left_on='Customer_ID', right_on='A7_ID') \
                 .merge(A9_dim, left_on='Customer_ID', right_on='A9_ID') \
                 .merge(A10_dim, left_on='Customer_ID', right_on='A10_ID') \
                 [['Customer_ID','A1_ID','A4_ID','A6_ID','A7_ID','A9_ID','A10_ID','A2','A3','A8','A14','A15','predictions','Target']]


    return fact_table

@test
def test_output(output, *args) -> None:
    """
    Template code for testing the output of the block.
    """
    assert output is not None, 'The output is undefined'

