In [1]:
#import module

import pandas
import pycaret
import numpy as np
import math
import matplotlib.pyplot as plt
from pycaret.classification import *
from sklearn.metrics import r2_score

from pycaret.regression import load_model

In [2]:
filename = "Data_2021_08_17_v1_m.csv"

In [6]:
INPUT_MEANS = [11.52298422039056, 11.456883174462424, 1.250509864774989, 1.2538539126579504, 0, 2.296427822897579, 2.298618931500766, -0.22316660284959997, -0.22387698756574836, 3498.3822575119407, 3.751657665101436, 20.142866427520758, 3.749931581386913, 20.117956611112046, 3.5000719453457, 3.5003074303218167, 13.929453255677942, 42.464752826424295, 52.22957014167389, 59.56812639809758, 5.02519452690933, 4.981609450455427, 33.70174577586959, 32.88914204821819, 0.995559901354316, 5.058896272683628, 5.014498592502798, 4.472254444692369, 58.21624804897175, 57.724760906550095, 0.37850598975131616, 0.3799372194302846, 0.008550744438473683, 0.012024154640011416, 0, 0]
INPUT_STDS = [4.051942143540716, 3.995097310727702, 0.4316337047899765, 0.4294732374156009, 1, 0.6957729777394037, 0.6977392847897756, 9.669547184856619, 9.669288866198748, 864.415584487189, 1.4757152190445155, 5.821673397813458, 1.4760283962164227, 5.824862857828931, 0.8345365926161037, 0.8325621560324485, 3.4611454895181297, 10.451871407488593, 16.258845716118564, 23.037792312255746, 4.547030895189253, 4.568923127681277, 24.953547369232165, 23.935996346040913, 0.0027699777076083173, 4.564224228050054, 4.58539660986043, 3.5122816008824467, 52.61663562827219, 52.86936082709284, 0.20188697414812357, 0.20143747396635506, 0.01081075942750983, 0.015381580408170992, 1, 1]

def data_preprocessing(parameter, lower_limit, upper_limit, new_feature_names = []):

    #load data
    csv = pandas.read_csv(filename)
    csv = csv.dropna(axis=0).reset_index(drop=True)#drop NaN

    csv = csv[csv['Lmt']>0.1].reset_index(drop=True)
    csv = csv[csv['Lmr']>0.1].reset_index(drop=True)
    csv = csv[csv['k']>0].reset_index(drop=True)

    #calculate R
    array_len = len(csv)
    R1 = np.zeros(array_len)
    R2 = np.zeros(array_len)
    R1 = csv["copperloss_tx"]/csv["I1"]**2
    R2 = csv["copperloss_rx"]/csv["I2"]**2

    csv = csv.assign(R1 = R1)
    csv = csv.assign(R2 = R2)

    #output data pre processing
    if lower_limit != -1 :
        csv = csv[csv[parameter] > lower_limit].reset_index(drop=True)
    if upper_limit != -1 :
        csv = csv[csv[parameter] < upper_limit].reset_index(drop=True)


    # unpack
    N1,N2,d1,d2,freq,move_tx,move_rx,offset_tx,offset_rx,per,space1,space2,space3,space4,space5,space6,l1,l2,h1,w1,Lmt,Lmr,Llt,Llr,k,Lt,Lr,Lm,Rt,Rr,I1,I2,copperloss_tx,copperloss_rx,R1,R2 = np.hsplit(csv.to_numpy(),36)
    input_arrs = [N1,N2,d1,d2,freq,move_tx,move_rx,offset_tx,offset_rx,per,space1,space2,space3,space4,space5,space6,l1, l2, h1, w1, I1, I2] # 22



    #Generae Features
    N1s = N1**2
    N2s = N2**2
    offset = abs(offset_tx-offset_rx)
    length1 = N1 * (l1*2 + space1*2 + w1 + space3*2)/2 + N1 * (l1*2 + space1*2 + space5 *2 + w1 + space3*2)/2
    length4 = N2 * (l1*2 + space2*2 + w1 + space4*2)/2 + N2 * (l2*2 + space2*2 + space6 *2 + w1 + space4*2)/2
    window2 = (l1 * 2 + space2 *2) * (w1 + space4 *2)
    from_l2 = (l2) - (space2) - (space5)

    new_features_dict = {
        "N1s"       : N1s,
        "N2s"       : N2s,
        "offset"    : offset,
        "length1"   : length1,
        "length4"   : length4,
        "window2"  : window2,
        "from_l2"   : from_l2
    }
    #filter by 'new_features'
    new_features_arr = [v for (k,v) in new_features_dict.items() if k in new_feature_names]
    new_features_arr = pandas.DataFrame(np.array(new_features_arr).squeeze().transpose(), columns = new_feature_names)


    # output parameter
    output_arrs = csv[parameter]



    # Standrize
    col_names = ["N1", "N2", "d1" , "d2", "freq", "move_tx", "move_rx", "offset_tx", "offset_rx", "per", "space1", "space2", "space3", "space4", "space5", "space6", "l1", "l2", "h1", "w1", "I1", "I2"]
    #input_arrs = [csv[col].to_numpy() for col in col_names]

    normalized_inp = [ (inp-mean)/std  for i,(inp, mean,std) in enumerate(zip(input_arrs, INPUT_MEANS,INPUT_STDS)) ]
    normalized_arr = pandas.DataFrame(np.array(normalized_inp).squeeze().transpose(), columns = col_names)


    # Merge
    processed_data = pandas.concat([normalized_arr, new_features_arr, output_arrs],axis=1)

    return processed_data



def classification_data(pre_data, parameter, model_name) :


    dataset = pre_data


    # split data for ML
    data = dataset.sample(frac=0.9, random_state=786).reset_index(drop=True)
    data_unseen = dataset.drop(data.index).reset_index(drop=True)

    print('Data for Modeling: ' + str(data.shape))
    print('Unseen Data For Predictions: ' + str(data_unseen.shape))


    # regresion setting
    exp_reg101 = setup(data = data, target = parameter, session_id=123, silent=True, use_gpu=False) 


    # create model
    lgbm = create_model('lightgbm',num_leaves=40)


    # save model
    save_model(lgbm, model_name)

    # plot model
    plot_model(lgbm)

    return [lgbm, data, data_unseen]



In [25]:
temp = data_preprocessing("Lmt", 5, 20, new_feature_names = ["N1s","offset","length1"])

parameter = "Lmt"

dataset = temp
#temp.hist('N1',bins=50)

#split data for ML
data = dataset.sample(frac=0.9, random_state=786).reset_index(drop=True)
data_unseen = dataset.drop(data.index).reset_index(drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

# regresion setting
exp_reg101 = setup(data = data, target = parameter, session_id=123, silent=True, use_gpu=False) 


# create model
lgbm = create_model('lightgbm',num_leaves=40)

data["freq"]

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Lmt
2,Target Type,Multiclass
3,Label Encoded,
4,Original Data,"(31676, 26)"
5,Missing Values,False
6,Numeric Features,25
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


IntProgress(value=0, description='Processing: ', max=4)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC


ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.

In [8]:
[model_Lmt, data_Lmt, data_unseen_Lmt] = classification_data(data_preprocessing("Lmt", 5, 20, new_feature_names = ["N1s","offset","length1"]), "Lmt", "ML_model2_v3_2021_08_17_v1_Lmt_c")


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Lmt
2,Target Type,Multiclass
3,Label Encoded,
4,Original Data,"(31676, 26)"
5,Missing Values,False
6,Numeric Features,25
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


IntProgress(value=0, description='Processing: ', max=4)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC


ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.

In [None]:
[model_Lmr, data_Lmr, data_unseen_Lmr] = classification_data(data_preprocessing("Lmr", 5, 20, new_feature_names = ["N2s","offset","length4"]), "Lmr", "ML_model2_v3_2021_08_17_v1_Lmr_c")
verify_data(model_Lmr, data_Lmr, data_unseen_Lmr, "Lmr")

In [None]:
[model_Llt, data_Llt, data_unseen_Llt] = classification_data(data_preprocessing("Llt", 5, 50, new_feature_names = ["offset","length1","length4","window2","from_l2"]), "Llt", "ML_model2_v3_2021_08_17_v1_Llt_c")
verify_data(model_Llt, data_Llt, data_unseen_Llt, "Llt")

In [None]:
[model_Llr, data_Llr, data_unseen_Llr] = classification_data(data_preprocessing("Llr", 5, 50, new_feature_names = ["offset","length1","length4","window2","from_l2"]), "Llr", "ML_model2_v3_2021_08_17_v1_Llr_c")
verify_data(model_Llr, data_Llr, data_unseen_Llr, "Llr")

In [None]:
[model_R1, data_R1, data_unseen_R1] = classification_data(data_preprocessing("R1", 0.03, 0.15, new_feature_names = ["N1s","offset","length1"]), "R1", "ML_model2_v3_2021_08_17_v1_R1_c")
verify_data(model_R1, data_R1, data_unseen_R1, "R1")

In [None]:
[model_R2, data_R2, data_unseen_R2] = classification_data(data_preprocessing("R2", -0.03, 0.15, new_feature_names = ["N2s","offset","length4"]), "R2", "ML_model2_v3_2021_08_17_v1_R2_c")
verify_data(model_R2, data_R2, data_unseen_R2, "R2")

Unnamed: 0,Description,Value
0,session_id,123
1,Target,R2
2,Target Type,Multiclass
3,Label Encoded,
4,Original Data,"(82264, 26)"
5,Missing Values,False
6,Numeric Features,25
7,Categorical Features,0
8,Ordinal Features,False
9,High Cardinality Features,False


IntProgress(value=0, description='Processing: ', max=4)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC


ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.