<a href="https://colab.research.google.com/github/MichalMaczek/praca_dyplomowa_Maczek_Myslajek/blob/main/Data_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import os
import glob


# %tensorflow_version 2.x
# import tensorflow as tf
# device_name = tf.test.gpu_device_name()
# if device_name != '/device:GPU:0':
#   raise SystemError('GPU device not found')
# print('Found GPU at: {}'.format(device_name))


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1.

**Get data**

ALL SCENARIOS

In [None]:
scenarios = {}
path = '/content/drive/MyDrive/Inż/Flows/CTU-'
files = [path + str(i) + '.binetflow' for i in range(1,14)]
names = ['CTU_' + str(j) for j in range(1,14)]

for i in range(len(names)):
  scenarios[names[i]] = pd.read_csv(files[i])

**CONSTANTS**


In [None]:
protocols = ['tcp', 'udp', 'icmp']

In [None]:
attributes = {
  "tcp": {'Sport': 65536,
          'Dport': 65536,
          'DstAddr': 2 ** 32
  },
  "udp": {'Sport': 65536,
          'Dport': 65536,
          'DstAddr': 2 ** 32
  },
  "icmp": {'Type': 65536,
           'Code': 65536,
           'DstAddr': 2 ** 32
  }
}

# 2.

**Drop unnecessary features**

In [None]:
for name in names:
    scenarios[name] = scenarios[name].drop(['StartTime', 'Dur', 'Dir', 'State', 'sTos', 'dTos', 'TotPkts', 'TotBytes', 'SrcBytes'], axis=1)

**Separate internal network**

In [None]:
# Seperate only 147.32.0.0/16 network
re_internal = '^147\.32\..*'
for name in names:
  scenarios[name] = scenarios[name][scenarios[name]['SrcAddr'].str.match(re_internal)==True] 

In [None]:
bots, hosts = [{} for i in range(2)]
for name in names:
  hosts[name] = scenarios[name]['SrcAddr'].unique()
  bots[name] = scenarios[name][scenarios[name]['Label'].str.contains('Botnet')]['SrcAddr'].unique()
  print("No of all hosts in internal network: ", len(hosts[name]))
  print("No of all bots in internal network:", len(bots[name]))

No of all hosts in internal network:  425
No of all bots in internal network: 1
No of all hosts in internal network:  392
No of all bots in internal network: 1
No of all hosts in internal network:  504
No of all bots in internal network: 1
No of all hosts in internal network:  374
No of all bots in internal network: 1
No of all hosts in internal network:  254
No of all bots in internal network: 1
No of all hosts in internal network:  354
No of all bots in internal network: 1
No of all hosts in internal network:  294
No of all bots in internal network: 1
No of all hosts in internal network:  478
No of all bots in internal network: 1
No of all hosts in internal network:  398
No of all bots in internal network: 10
No of all hosts in internal network:  413
No of all bots in internal network: 10
No of all hosts in internal network:  272
No of all bots in internal network: 3
No of all hosts in internal network:  303
No of all bots in internal network: 3
No of all hosts in internal network:  

# 3.

**Split flows depending on protocol**

In [None]:
from IPython.display import clear_output
count = 1
flows = {}

for name in names:
  flows[name] = {}
  for host in hosts[name]:
    print(name, host)
    clear_output()
    print("Info: host no. ",count)
    flows[name][host] = {}
    flows_of_specified_host = scenarios[name].loc[scenarios[name]['SrcAddr']==host]
    for protocol in protocols:
      flows[name][host][protocol] = flows_of_specified_host.loc[flows_of_specified_host['Proto']==protocol]
      if not flows[name][host][protocol].empty:
        flows[name][host][protocol] = flows[name][host][protocol].fillna(0)
    count +=1

Info: host no.  4894


In [None]:
flows['CTU_8']

Unnamed: 0,Proto,SrcAddr,Sport,DstAddr,Dport,Label
137772,udp,2468369573,21920,977014567,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
137773,udp,2468369573,21921,977014309,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
137774,udp,2468369573,21922,3682941988,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
137826,udp,2468369573,21923,3074175265,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
137853,udp,2468369573,21924,3074175079,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
...,...,...,...,...,...,...
325667,udp,2468369573,29144,1884968632,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
326214,udp,2468369573,29145,977014567,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
326269,udp,2468369573,29146,3074175081,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...
328601,udp,2468369573,29147,977014309,8000,flow=From-Botnet-V49-UDP-Established-Custom-En...


# 4.


**Threshold**

In [None]:
THRESHOLD = 150

In [None]:
# for threshold = 0
hosts_below_threshold = {}

In [None]:
hosts_below_threshold = {}
for name in names:
  hosts_below_threshold[name] = {}
  for protocol in protocols:
    hosts_below_threshold[name][protocol] = []
    cnt = 1
    for host in hosts[name]:
      # print(len(flows[name][host][protocol]))
      if(len(flows[name][host][protocol]) <= THRESHOLD):
        cnt +=1
        print(str(cnt)+ ' Host: ' + name + '_' + str(host) + ', Proto: ' + str(protocol))
        hosts_below_threshold[name][protocol].append(host)
        
    print(cnt)

# 5.

**Convert hex values {ICMP Type, ICMP Code} to integers**

In [None]:
def hexa2int(hexa):
    if str(hexa) == 'nan':
        return 0
    return int(str(hexa), 0)

In [None]:
for name in names:
  for host in hosts[name]:
    flows[name][host]['icmp']['Sport'] = flows[name][host]['icmp']['Sport'].apply(hexa2int)
    flows[name][host]['icmp']['Dport'] = flows[name][host]['icmp']['Dport'].apply(hexa2int)
    flows[name][host]['icmp'].rename(columns={"Sport": "Type", "Dport": "Code"}, inplace=True)

In [None]:
# for addresses in flows:
#   print('\n')
#   print('Host: ' + str(addresses))
#   print(flows[addresses]['icmp'])

In [None]:
# # check
# print(flows)

# 5.

**Convert IP addresses to integers**

In [None]:
import struct
import socket
# convert the IP into an integer, e.g. IP 255.255.255.255 = 255*(256^3) + 255*(256^2) + 255*256 + 255 = 4294967295
def ip2int(addr):
    return struct.unpack('!I', socket.inet_aton(addr))[0]

In [None]:
for name in names:
  for host in hosts[name]:
    for protocol in protocols:
      flows[name][host][protocol]['SrcAddr'] = flows[name][host][protocol]['SrcAddr'].apply(ip2int)
      flows[name][host][protocol]['DstAddr'] = flows[name][host][protocol]['DstAddr'].apply(ip2int)

In [None]:
# # check
# print(flows)

# 6.

**Host signatures**

Regular Bins

In [None]:
signatures = {}
for name in names:
  signatures[name] = {}
  for host in hosts[name]:
    signatures[name][host] = []
    for protocol in protocols:
      for attribute in attributes[protocol]:
        if host not in hosts_below_threshold[name][protocol]:
          limit = attributes[protocol][attribute]
          list_of_attribute_values = flows[name][host][protocol][attribute].astype(int).tolist()

          hist, bin_edges = np.histogram(list_of_attribute_values, bins=no_of_bins, range=[0, limit])
          # print('Host: ' + name + '_' + str(host) + ', Proto: ' + str(protocol) + ', Attr: ' + str(attribute))
          # print(bin_edges)

          # replace eventual NaN values by 0
          hist[np.isnan(hist)] = 0
          signatures[name][host].extend(hist) # extend dodaje pojedyncze wartości, append cały array
        else:
          hist = np.zeros(no_of_bins, dtype=int)
          signatures[name][host].extend(hist)

Adaptive bins

In [None]:
ad_bins = {}
values = pd.read_csv('/content/drive/MyDrive/Inż/values.csv', sep=',', index_col=0)

features = []
for protocol in protocols:
  for attribute in attributes[protocol]:

    feature = attribute + '_' + protocol
    features.append(feature)

    ad_bins[feature] = []

    limit = attributes[protocol][attribute]
    step = limit/1000

    bin_edges = np.arange(0, limit, step)

    cumulative = np.cumsum(values.loc[feature].to_numpy())
    cumulative = np.insert(cumulative, 0, 0.0)

    step = max(cumulative) / no_of_bins
    sig = np.arange(0, max(cumulative), step)

    for i in sig:
      for id in range(len(cumulative) - 1):
        if cumulative[id] <= i < cumulative[id + 1]:
          ad_bins[feature].append(bin_edges[id])

signatures = {}
for name in names:
  signatures[name] = {}
  for host in hosts[name]:
    signatures[name][host] = []
    for protocol in protocols:
      for attribute in attributes[protocol]:
        if host not in hosts_below_threshold[name][protocol]:
            
          list_of_attribute_values = flows[name][host][protocol][attribute].astype(int).tolist()

          hist, bin_edges = np.histogram(list_of_attribute_values, bins=ad_bins[feature], density=True)

          hist[np.isnan(hist)] = 0
        else:
          hist = np.zeros(no_of_bins, dtype=int)
        signatures[name][host].extend(hist)

In [None]:
# discard the hosts below threshold
i = 0
hosts_above_threshold = {}
for name in names:
  hosts_above_threshold[name] = []
  for host in hosts[name]:
    # hosts_above_threshold[name].append(host) # if threshold == 0
    if all(x == 0 for x in signatures[name][host]): # if threshold != 0
      i += 1
      # print(str(i) + ' ' + name + ' Host: ' +str(host) + str(signatures[name][host]))

      del signatures[name][host]
      # print(host)
    else:
      hosts_above_threshold[name].append(host)

# 7.

**Label the signatures**

In [None]:
labels = {}
for name in names:
  labels[name] = {}
  for host in hosts_above_threshold[name]:
    if host in bots[name]:
      labels[name][host] = 1
    else:
      labels[name][host] = 0

In [None]:
labels_merged = {}
signatures_merged = {}
for name in names:
  new_labels = {name + '_' + str(key): val for key, val in labels[name].items()}
  labels_merged.update(new_labels)

  new_signatures = {name + '_' + str(key): val for key, val in signatures[name].items()}
  signatures_merged.update(new_signatures)

In [None]:
# check for correct order of both dicts
for i in labels_merged:
  if list(labels_merged).index(i) == list(signatures_merged).index(i):
    continue
  else:
    print('Wrong order: ' + str(i))

In [None]:
# check for length
print(len(labels_merged))

print(len(signatures_merged))

1633
1633


# 8.

**Features scaling**

In [None]:
# # FROM FILE
# XY = pd.read_csv('/content/drive/MyDrive/Inż/to_classify/XY_512_150.csv', sep=',', index_col=0)

# features = []
# for (columnName, columnData) in XY.iteritems():
#   features.append(columnName)

# # # X = pd.DataFrame(XY, columns= features[:-1])
# # # Y = pd.DataFrame(XY, columns = ["Label"])

In [None]:
df_final = pd.DataFrame.from_dict(signatures_merged, orient='index')
df_final["Label"] = pd.Series(labels_merged)

In [None]:
# XY

In [None]:
df_final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9177,9178,9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194,9195,9196,9197,9198,9199,9200,9201,9202,9203,9204,9205,9206,9207,9208,9209,9210,9211,9212,9213,9214,9215,Label
CTU_1_147.32.86.194,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,91,114,115,126,111,95,92,74,67,95,88,91,94,94,95,102,116,125,113,100,111,126,116,97,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.80.13,0,51,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.84.229,0,67,0,0,0,0,232,0,0,0,0,0,0,0,0,0,7,0,0,0,9,32,35,31,33,31,41,45,18,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.84.59,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,475,586,531,571,544,544,574,595,562,601,590,568,520,528,499,469,484,525,574,577,566,536,584,555,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.84.138,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTU_13_147.32.85.30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_13_147.32.86.151,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_13_147.32.86.208,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_13_147.32.86.160,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# check
df_final[df_final['Label']==1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9177,9178,9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194,9195,9196,9197,9198,9199,9200,9201,9202,9203,9204,9205,9206,9207,9208,9209,9210,9211,9212,9213,9214,9215,Label
CTU_1_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,295,238,179,172,232,223,181,198,203,193,171,188,185,166,185,178,195,188,210,183,167,161,188,208,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_2_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,347,340,358,297,307,353,390,370,366,339,353,364,372,357,379,395,370,338,357,383,351,369,359,360,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_3_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,463,499,511,512,508,512,508,513,512,511,511,510,511,511,509,511,513,469,459,444,448,444,447,444,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_4_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,10,26,23,30,26,14,16,36,10,22,26,8,29,28,0,22,11,11,15,21,8,12,22,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_5_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,7,1,14,49,44,48,47,48,47,46,50,48,51,50,56,64,64,63,4,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_6_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,81,62,109,54,88,66,48,105,43,89,42,66,88,67,75,105,87,83,70,58,171,35,65,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_8_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94,143,56,97,107,65,137,72,91,64,104,77,101,110,61,122,80,110,67,106,80,106,97,85,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_9_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,146,124,95,116,147,155,175,149,139,149,150,152,142,151,142,144,141,151,142,133,148,118,134,131,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_9_147.32.84.191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,142,119,97,101,144,146,140,136,144,147,141,149,134,146,138,137,145,146,142,157,152,160,142,146,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_9_147.32.84.192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,146,134,120,119,181,179,178,171,176,182,172,162,162,152,155,171,162,155,184,156,162,163,157,160,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [None]:
# Seperate X from Y
X = df_final.loc[ : , df_final.columns != 'Label']
Y = df_final.loc[ : , df_final.columns == 'Label']

In [None]:
# # Seperate X from Y
# X = XY.loc[ : , XY.columns != 'Label']
# Y = XY.loc[ : , XY.columns == 'Label']

In [None]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1633 entries, CTU_1_147.32.86.194 to CTU_13_147.32.85.114
Columns: 9216 entries, 0 to 9215
dtypes: int64(9216)
memory usage: 114.8+ MB


In [None]:
Y.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1633 entries, CTU_1_147.32.86.194 to CTU_13_147.32.85.114
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Label   1633 non-null   int64
dtypes: int64(1)
memory usage: 25.5+ KB


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
print(scaler.fit(X))
# print(scaler.mean_)

StandardScaler()


In [None]:
scaled_features = scaler.transform(X)
X_scaled = pd.DataFrame(scaled_features, index=X.index)

In [None]:
# COMPARE X TO X_SCALED
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9176,9177,9178,9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194,9195,9196,9197,9198,9199,9200,9201,9202,9203,9204,9205,9206,9207,9208,9209,9210,9211,9212,9213,9214,9215
CTU_1_147.32.86.194,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,91,114,115,126,111,95,92,74,67,95,88,91,94,94,95,102,116,125,113,100,111,126,116,97,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.80.13,0,51,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.84.229,0,67,0,0,0,0,232,0,0,0,0,0,0,0,0,0,7,0,0,0,9,32,35,31,33,31,41,45,18,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.84.59,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,475,586,531,571,544,544,574,595,562,601,590,568,520,528,499,469,484,525,574,577,566,536,584,555,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_1_147.32.84.138,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTU_13_147.32.85.30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_13_147.32.86.151,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_13_147.32.86.208,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
CTU_13_147.32.86.160,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
X_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9176,9177,9178,9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194,9195,9196,9197,9198,9199,9200,9201,9202,9203,9204,9205,9206,9207,9208,9209,9210,9211,9212,9213,9214,9215
CTU_1_147.32.86.194,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,0.936708,1.132092,1.141447,1.274905,1.082332,0.904593,0.872823,0.678147,0.604870,0.913365,0.843740,0.870709,0.906267,0.911181,0.920305,0.993870,1.137912,1.250441,1.132501,0.978919,1.100635,1.265004,1.137059,0.937220,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTU_1_147.32.80.13,-0.058624,3.545808,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,-0.126933,-0.126169,-0.122829,-0.123713,-0.128660,-0.128513,-0.128203,-0.126850,-0.126265,-0.124419,-0.126491,-0.123154,-0.122240,-0.120036,-0.121215,-0.122761,-0.127015,-0.122434,-0.121530,-0.118242,-0.117229,-0.117996,-0.116540,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTU_1_147.32.84.229,-0.058624,4.689812,-0.049368,0.0,0.0,0.0,16.874261,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,-0.045114,-0.126169,-0.122829,-0.123713,-0.030471,0.219481,0.252622,0.210379,0.233846,0.214226,0.325548,0.368317,0.074708,-0.120036,-0.121215,-0.122761,-0.127015,-0.122434,-0.121530,-0.118242,-0.117229,-0.117996,-0.116540,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTU_1_147.32.84.59,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,5.425040,6.341736,5.714829,6.214468,5.806289,5.787381,6.117331,6.345760,6.006541,6.440929,6.378468,6.080297,5.567371,5.672330,5.349503,5.011548,5.150783,5.643642,6.248505,6.212378,6.092781,5.765242,6.194682,5.901769,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTU_1_147.32.84.138,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,-0.126933,-0.126169,-0.122829,-0.123713,-0.128660,-0.128513,-0.128203,-0.126850,-0.126265,-0.124419,-0.126491,-0.123154,-0.122240,-0.120036,-0.121215,-0.122761,-0.127015,-0.122434,-0.121530,-0.118242,-0.117229,-0.117996,-0.116540,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTU_13_147.32.85.30,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,-0.126933,-0.126169,-0.122829,-0.123713,-0.128660,-0.128513,-0.128203,-0.126850,-0.126265,-0.124419,-0.126491,-0.123154,-0.122240,-0.120036,-0.121215,-0.122761,-0.127015,-0.122434,-0.121530,-0.118242,-0.117229,-0.117996,-0.116540,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTU_13_147.32.86.151,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,-0.126933,-0.126169,-0.122829,-0.123713,-0.128660,-0.128513,-0.128203,-0.126850,-0.126265,-0.124419,-0.126491,-0.123154,-0.122240,-0.120036,-0.121215,-0.122761,-0.127015,-0.122434,-0.121530,-0.118242,-0.117229,-0.117996,-0.116540,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTU_13_147.32.86.208,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,-0.126933,-0.126169,-0.122829,-0.123713,-0.128660,-0.128513,-0.128203,-0.126850,-0.126265,-0.124419,-0.126491,-0.123154,-0.122240,-0.120036,-0.121215,-0.122761,-0.127015,-0.122434,-0.121530,-0.118242,-0.117229,-0.117996,-0.116540,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CTU_13_147.32.86.160,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,-0.126933,-0.126169,-0.122829,-0.123713,-0.128660,-0.128513,-0.128203,-0.126850,-0.126265,-0.124419,-0.126491,-0.123154,-0.122240,-0.120036,-0.121215,-0.122761,-0.127015,-0.122434,-0.121530,-0.118242,-0.117229,-0.117996,-0.116540,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Merge X_scaled with Y
XY_scaled = pd.concat([X_scaled, Y], axis=1)

In [None]:
# COMPARE XY TO XY_SCALED
df_final[df_final['Label']==1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9177,9178,9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194,9195,9196,9197,9198,9199,9200,9201,9202,9203,9204,9205,9206,9207,9208,9209,9210,9211,9212,9213,9214,9215,Label
CTU_1_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,295,238,179,172,232,223,181,198,203,193,171,188,185,166,185,178,195,188,210,183,167,161,188,208,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_2_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,347,340,358,297,307,353,390,370,366,339,353,364,372,357,379,395,370,338,357,383,351,369,359,360,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_3_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,463,499,511,512,508,512,508,513,512,511,511,510,511,511,509,511,513,469,459,444,448,444,447,444,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_4_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,70,10,26,23,30,26,14,16,36,10,22,26,8,29,28,0,22,11,11,15,21,8,12,22,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_5_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48,7,1,14,49,44,48,47,48,47,46,50,48,51,50,56,64,64,63,4,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_6_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,81,62,109,54,88,66,48,105,43,89,42,66,88,67,75,105,87,83,70,58,171,35,65,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_8_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94,143,56,97,107,65,137,72,91,64,104,77,101,110,61,122,80,110,67,106,80,106,97,85,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_9_147.32.84.165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,146,124,95,116,147,155,175,149,139,149,150,152,142,151,142,144,141,151,142,133,148,118,134,131,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_9_147.32.84.191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,142,119,97,101,144,146,140,136,144,147,141,149,134,146,138,137,145,146,142,157,152,160,142,146,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
CTU_9_147.32.84.192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,146,134,120,119,181,179,178,171,176,182,172,162,162,152,155,171,162,155,184,156,162,163,157,160,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [None]:
XY_scaled[XY_scaled['Label']==1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,9177,9178,9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194,9195,9196,9197,9198,9199,9200,9201,9202,9203,9204,9205,9206,9207,9208,9209,9210,9211,9212,9213,9214,9215,Label
CTU_1_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,3.321134,2.500727,1.845044,1.785512,2.402421,2.296568,1.841207,2.027061,2.088966,1.983921,1.758845,1.930101,1.901949,1.701049,1.907007,1.825868,1.999371,1.94237,2.20897,1.889563,1.715053,1.649171,1.915155,2.140418,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_2_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,3.928929,3.62654,3.812918,3.173029,3.220659,3.710293,4.115278,3.898135,3.867698,3.578831,3.765459,3.852297,3.94802,3.796393,4.0339,4.201444,3.907665,3.589821,3.840321,4.083886,3.733855,3.932219,3.763132,3.788042,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_3_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,5.284779,5.381484,5.494955,5.55956,5.413535,5.439387,5.399203,5.453737,5.460918,5.457765,5.507465,5.446846,5.468897,5.485834,5.459137,5.471337,5.467015,5.028594,4.972278,4.753154,4.798114,4.755433,4.714138,4.698571,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_4_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,0.691253,-0.015796,0.163007,0.131591,0.198635,0.154232,0.024127,0.047204,0.266584,-0.015179,0.116067,0.160807,-0.034707,0.198105,0.18576,-0.122761,0.112885,-0.001621,0.000544,0.046332,0.113178,-0.030186,0.013143,0.124247,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_5_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,0.434109,-0.048908,-0.111836,0.031689,0.405922,0.349978,0.394071,0.384432,0.397533,0.389011,0.380675,0.422925,0.402955,0.439454,0.426953,0.490291,0.570876,0.580478,0.57762,-0.074356,-0.117229,-0.117996,-0.11654,-0.114224,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_6_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,0.866578,0.767858,0.558781,1.086203,0.460471,0.82847,0.589924,0.395311,1.019544,0.345315,0.854765,0.335552,0.599903,0.845358,0.613331,0.698291,1.017962,0.833087,0.799572,0.649771,0.519132,1.758933,0.261701,0.590351,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_8_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,0.971773,1.452176,0.492818,0.953001,1.038692,0.578349,1.362456,0.656391,0.866769,0.57472,1.020146,0.717807,0.982858,1.086707,0.54755,1.212817,0.745348,1.085696,0.62201,1.044749,0.760511,1.04548,0.931728,0.807144,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_9_147.32.84.165,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,1.579568,1.242466,0.921573,1.163904,1.475086,1.557082,1.775923,1.494022,1.390568,1.503263,1.527312,1.536925,1.431462,1.536493,1.435583,1.453658,1.410525,1.535999,1.454332,1.340982,1.50659,1.177195,1.331583,1.305767,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_9_147.32.84.191,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,1.532815,1.187279,0.94356,0.997402,1.442356,1.459208,1.395098,1.352604,1.44513,1.481415,1.428084,1.50416,1.343929,1.481641,1.391729,1.377027,1.454144,1.481084,1.454332,1.604301,1.550477,1.638195,1.418038,1.468361,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
CTU_9_147.32.84.192,-0.058624,-0.100705,-0.049368,0.0,0.0,0.0,-0.098425,0.0,-0.076405,-0.036508,0.0,0.0,0.0,0.0,0.0,-0.06182,1.579568,1.35284,1.196416,1.197204,1.84602,1.818077,1.808565,1.733346,1.794329,1.863756,1.76987,1.64614,1.650293,1.547463,1.578106,1.749237,1.639521,1.579931,1.920432,1.593329,1.660194,1.671123,1.580141,1.620116,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [None]:
# # COMPARE XY TO XY_SCALED
# XY[XY['Label']==1]

In [None]:
# XY_scaled[XY_scaled['Label']==1]

# 9.

**Save files for ML usage**

In [None]:
# scaled - for ML use
X_scaled.to_csv('/content/drive/MyDrive/Inż/to_classify/scaled/X_scaled_1024_150.csv')
Y.to_csv('/content/drive/MyDrive/Inż/to_classify/scaled/Y_1024_150.csv')
XY_scaled.to_csv('/content/drive/MyDrive/Inż/to_classify/scaled/XY_scaled_1024_150.csv')

# scaled - for ML use
# X_scaled.to_csv('/content/drive/MyDrive/Inż/to_classify/X_scaled.csv')
# Y.to_csv('/content/drive/MyDrive/Inż/to_classify/Y.csv')
# XY_scaled.to_csv('/content/drive/MyDrive/Inż/to_classify/XY_scaled.csv')

In [None]:
# # not scaled - for comparison
# X.to_csv('/content/drive/MyDrive/Inż/to_classify/X.csv')
# df_final.to_csv('/content/drive/MyDrive/Inż/to_classify/XY.csv')