#Import Data

In [None]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, GridSearchCV, cross_val_score 
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.metrics import precision_recall_fscore_support as score

Final_Data_Source = pd.read_csv("/content/drive/MyDrive/Final_Data_Source.csv")
Final_Data_Target = pd.read_csv("/content/drive/MyDrive/Final_Data_Target.csv")

#Data Preparation

In [None]:
np.random.seed(32)
Final_Data_Source.labels = Final_Data_Source.labels.round()
Final_Data_Source.loc[Final_Data_Source[Final_Data_Source.labels.isin([3.0])].index,'labels']=2.0
Final_Data_Source.loc[Final_Data_Source[Final_Data_Source.labels.isin([4.0])].index,'labels']=2.0
Final_Data_Source.sum_power = Final_Data_Source.sum_power/5
Final_Data_Source.drop(['power1', 'power2', 'power3', 'power5','power6', 'window'], axis= 1, inplace= True)


Final_Data_Target.drop(['power1', 'power2', 'power3', 'power4'], axis= 1, inplace= True)
Final_Data_Target.sum_power = Final_Data_Target.sum_power/4
Final_Data_Target.columns = ['time', 'co2', 'Door_contact','rms', 'motion', 'labels', 'sum_power', 
       'derivative_co2']
Final_Data_Target.loc[Final_Data_Target[Final_Data_Target.labels.isin([3.0])].index,'labels']=2.0
Final_Data_Target.loc[Final_Data_Target[Final_Data_Target.labels.isin([4.0])].index,'labels']=2.0
Final_Data_Source = Final_Data_Source[Final_Data_Target.columns]

Final_Data_Target = Final_Data_Target.sample(frac=1)
Final_Data_Source = Final_Data_Source.sample(frac=1)

F_Train = Final_Data_Target.iloc[:1 ,:]
F_Test = Final_Data_Target.iloc[1 :,:]

frames = [F_Train, Final_Data_Source]
F_Train = pd.concat(frames)
Final_Data_Source = F_Train.sample(frac=1)

y_train_T = Final_Data_Target.labels
X_train_T = Final_Data_Target.drop(['labels', 'time'], axis= 1)

y_train_S = Final_Data_Source.labels
X_train_S = Final_Data_Source.drop(['labels', 'time'], axis= 1)



In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_train_T, y_train_T, test_size=0.35)
pipelines = []
params = []
names = []
pipelines.append(Pipeline([('clf', DecisionTreeClassifier())])) ## DecisionTreeClassifier
params.append({'clf__max_features': [None], 'clf__min_samples_split': [2], 'clf__min_samples_leaf':[1],
              'clf__class_weight': ['balanced']})
names.append('DecisionTreeClassifier') 


 
def model(pipeline, parameters, name, X, y):    
    cv = StratifiedKFold(n_splits=10, shuffle=True)

    grid_obj = GridSearchCV(estimator=pipeline, param_grid=parameters, cv=cv, scoring='f1_micro', n_jobs=-1)
    grid_obj.fit(X,y)  

    print(name, 'F1-measure:', grid_obj.best_score_)
    estimator = grid_obj.best_estimator_
    estimator.fit(X,y) # train on all training dataset
    return estimator 






def evaluate_models(estimators, names, X_test, y_test):
    
    for idx, estimator in enumerate(estimators):
      try:

        print('\nPerformance of', names[idx])
        
        y_pred = estimator.predict(X_test)       
        print('\nConfusion matrix\n', confusion_matrix(y_test, y_pred), '\n')    
        print('F1-measure', f1_score(y_test, y_pred, average='micro'), '\n') 
        precision, recall, fscore, support = score(y_test, y_pred)

        print('precision: {}'.format(precision))
        print('recall: {}'.format(recall))
        print('fscore: {}'.format(fscore))
        print('support: {}'.format(support))   
      except:
        continue
print('Target results without TL:')
estimators = []
for idx in range(0,len(pipelines)):    
    estimators.append(model(pipelines[idx], params[idx], names[idx], X_train, y_train))        
evaluate_models(estimators, names, X_test.values, y_test.values.reshape(-1,1))


print('Target results with TL:')
estimators = []
for idx in range(0,len(pipelines)):    
    estimators.append(model(pipelines[idx], params[idx], names[idx], X_train_S, y_train_S))        
evaluate_models(estimators, names, X_test.values, y_test.values.reshape(-1,1))


Target results without TL:
DecisionTreeClassifier F1-measure: 0.8994073748902546

Performance of DecisionTreeClassifier

Confusion matrix
 [[254   8   3]
 [  6  44  11]
 [  1  10  28]] 

F1-measure 0.8931506849315068 

precision: [0.97318008 0.70967742 0.66666667]
recall: [0.95849057 0.72131148 0.71794872]
fscore: [0.96577947 0.71544715 0.69135802]
support: [265  61  39]
Target results with TL:
DecisionTreeClassifier F1-measure: 0.9496274217585693

Performance of DecisionTreeClassifier

Confusion matrix
 [[222  18  25]
 [ 58   1   2]
 [ 36   1   2]] 

F1-measure 0.6164383561643836 

precision: [0.70253165 0.05       0.06896552]
recall: [0.83773585 0.01639344 0.05128205]
fscore: [0.76419966 0.02469136 0.05882353]
support: [265  61  39]
