# Ensembale Mode here
Combine all the sub-model with Bagging method

In [43]:
import numpy as np
import pandas as pd
import scipy
import json
import seaborn as sns
from sklearn.base import TransformerMixin
from sklearn import preprocessing
from sklearn import metrics
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, learning_curve, StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve, classification_report, confusion_matrix, plot_confusion_matrix
from sklearn.pipeline import make_pipeline, Pipeline
import joblib
import matplotlib.pyplot as plt

### Figure the basic value

In [44]:
model_path = 'Choesn_model/'
configure_file_path = 'Choesn_model/Configuration.json'

TRAINFILE = 'keyword.csv'
TESTFILE = 'key_word_test.csv'
boolean = 'True'

## Define the MODEL object

In [45]:
class Model(object):

    def  __init__(self, model, name, test_set, topic, is_preprocess, level):
        self.model = model
        self.name = name
        self.test_set = test_set
        self.topic = topic
        self.is_preprocess = is_preprocess
        self.level = level

    def show(self):
        print(
            self.name,'\t',
            self.test_set,'\t',
            self.topic,'\t',
            self.is_preprocess,'\t',
            self.level
        )

    def predict(self, x):
        return self.model.predict(x)

    def predict_proba(self, x):
        return self.model.predict_proba(x)
        


## Load the model detail from json figuration file

In [46]:
def load_configuration_to_model(file_path):
    with open(configure_file_path, 'r') as json_fp:
        configuration = json.load(json_fp)

    layer_1 = []
    layer_2 = []
    for model_figure in configuration:
        # read the figure
        model_file = joblib.load(model_path + model_figure['model_name'])
        name = model_figure['model_name']
        test_set = model_figure['test_set']
        topic = model_figure['topic']
        is_preprocess = boolean == model_figure['preprocess']
        level = int(model_figure['level'])

        # load to model list
        model = Model(model_file, name, test_set, topic, is_preprocess, level)

        if level == 1:
            layer_1.append(model)
        else:
            layer_2.append(model)

    return [layer_1, layer_2]
    

## Prepare the testing data and preprocess vector

In [73]:
def get_vector(df, fit_column):
    '''
    df                      str: The train df
    fit_column              str: The column for vector to fit
    '''


    # prepare the tranform vector
    vector = TfidfVectorizer().fit(df[fit_column])

    return vector

def preprocess(df, column_name_list):
    '''
    This function to use to prepare all the data for ensemble system running
    including RAW data and Vector-preprocess data
    '''

    test_data_list = []
    vector = get_vector(df, 'article_words')
    
    for column in column_name_list:
        en_data = vector.transform(df[column])
        test_data_list.append(en_data)

    return test_data_list


In [74]:
df = pd.read_csv(TESTFILE)
preprocess(df, ['key_word_100', 'article_words'])

(500, 8112)
(500, 8112)
