# Train the functional use classifier

In [3]:
import sys
sys.path.append("./src") # append to system path

import json
import pandas as pd
import numpy as np

import modeling_tool as mt
from make_training_data import data_sampler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt 

#static parameters
BATCH_SIZE = 1
RUGULARIZATION = 0.


# Dealing with data here

In [6]:
# load data
df = pd.read_csv('./data/descs/0314_6_functional_use_descs.csv',header=0)


# sample and split data
this_data = data_sampler()
this_data.sample_data(df, num_test_left=30)

trn_X_raw = this_data.trn_data['descs']

# add bias here
N,M = trn_X_raw.shape
trn_X = np.ones((N, M+1))
trn_X[:, 1:] = trn_X_raw
trn_Y = this_data.trn_data['target']

tst_X_raw = this_data.tst_data['descs']
N_tst, M_tst = tst_X_raw.shape
tst_X = np.ones((N_tst, M_tst+1))
tst_X[:, 1:] = tst_X_raw
tst_Y = this_data.tst_data['target']
target_names = np.unique(this_data.trn_data['class'])

from collections import Counter
print Counter(this_data.trn_data['class'])
print Counter(this_data.tst_data['class'])


Counter({'Fragrances': 400, 'Antibacterial': 400, 'Pesticides': 400, 'Antioxidant': 298, 'Chelating Agents': 211, 'Colorants': 190, 'Surfactants': 167, 'Solvents': 158, 'Oxidizing Agent': 87})
Counter({'Fragrances': 30, 'Oxidizing Agent': 30, 'Antioxidant': 30, 'Chelating Agents': 30, 'Antibacterial': 30, 'Solvents': 30, 'Surfactants': 30, 'Pesticides': 30, 'Colorants': 30})


# Initilize the training process

In [7]:
this_classifier = mt.create_functional_use_classifier('./net/tensorflow_classifier_Mar14')
trn_X, tst_X, vec = this_classifier.fit_scaler(StandardScaler(),trn_X, tst_X)

# training
this_classifier.train(trn_X,trn_Y,tst_X,tst_Y, num_epoch=600, num_neroun=128,learning_rate=0.01)

# print out the training results
print classification_report(np.argmax(tst_Y,axis=1), this_classifier.predict(tst_X),target_names=target_names)
print confusion_matrix(np.argmax(tst_Y,axis=1), this_classifier.predict(tst_X))



Epoch = 1, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 2, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 3, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 4, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 5, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 6, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 7, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 8, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 9, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 10, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 11, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 12, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 13, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 14, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 15, Training Accuracy = 10.00%, Testing Accuracy = 11.11%
Epoch = 16, Trainin

KeyboardInterrupt: 

# Save the Model

In [None]:
this_classifier.save_model('./net/tensorflow_classifier_Jan12.json')

## save this classification results to csv

In [None]:
def classifaction_report_csv(report):
    report_data = []
    lines = report.split('\n')
    for line in lines[2:-3]:
        row = {}
        row_data = line.split('      ')
        row['class'] = row_data[0]
        row['precision'] = float(row_data[1])
        row['recall'] = float(row_data[2])
        row['f1_score'] = float(row_data[3])
        row['support'] = float(row_data[4])
        report_data.append(row)
    return pd.DataFrame.from_dict(report_data)
   

this_report =classifaction_report_csv(classification_report(np.argmax(tst_Y,axis=1), this_classifier.predict(tst_X)))
this_confusion_matrix = confusion_matrix(np.argmax(tst_Y,axis=1), this_classifier.predict(tst_X))

this_report.to_csv('./results/classification_report_classifier_Jan12.csv')
np.savetxt('./results/conf_matrix_classifier_Jan12.csv',this_confusion_matrix, delimiter=',')

