In [9]:
import sys
import os
sys.path.insert(0, os.path.abspath(".."))

from slim_gsgp.datasets.data_loader import load_pandas_df
import pandas as pd
import numpy as np
from slim_gsgp.main_gp import gp
from slim_gsgp.main_slim import slim
from slim_gsgp.main_gsgp import gsgp
from slim_gsgp.utils.utils import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

import torch
from imblearn.over_sampling import SMOTENC, SMOTE

In [10]:
def print_scores(y_test, predictions):
    print("Roc Score:", roc_auc_score(y_test, predictions))
    print("F1 Score:", f1_score(y_test, predictions))
    print("Accuracy Score:", accuracy_score(y_test, predictions))

In [11]:
df = pd.read_csv('data/BankChurners.csv')
df.drop(columns=['CLIENTNUM', 'CLIENTNUM'], inplace=True)
df = df.iloc[:, :-2]
df.isna().sum()

Attrition_Flag              0
Customer_Age                0
Gender                      0
Dependent_count             0
Education_Level             0
Marital_Status              0
Income_Category             0
Card_Category               0
Months_on_book              0
Total_Relationship_Count    0
Months_Inactive_12_mon      0
Contacts_Count_12_mon       0
Credit_Limit                0
Total_Revolving_Bal         0
Avg_Open_To_Buy             0
Total_Amt_Chng_Q4_Q1        0
Total_Trans_Amt             0
Total_Trans_Ct              0
Total_Ct_Chng_Q4_Q1         0
Avg_Utilization_Ratio       0
dtype: int64

In [12]:
# df['Attrition_Flag'] = df['Attrition_Flag'].map({'Existing Customer': 0, 'Attrited Customer': 1})
# df = df.select_dtypes(include=np.number)
# df = df[df.columns[1:].tolist() + [df.columns[0]]]
# df

In [13]:
df = pd.read_csv('data_prepared/blood.csv')

In [14]:
X, y = load_pandas_df(df, X_y=True)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, p_test=0.2)

In [None]:
slim_f1= slim(X_train=X_train, y_train=y_train, seed=1, log_path='log/slim_f1.csv', ms_lower = 0, ms_upper= 0.3, p_inflate = 0.5,
                  X_test=X_val, y_test=y_val, slim_version='SLIM+SIG2', pop_size=200, n_iter=1000, minimization=False, fitness_function='accuracy',
                  )
predictions = slim_f1.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.7612687945365906  |   0.75                   |   0.09656405448913574  |      7           |
|     dataset_1           |       1      |   0.7612687945365906  |   0.75                   |   0.1450967788696289   |      7           |
|     dataset_1           |       2      |   0.7612687945365906  |   0.75                   |   0.1792125701904297   |      7           |
|     dataset_1           |       3      |   0.7629382014274597  |   0.7583333253860474     |   0.1441800594329834   |      20          |
|     dataset_1  

In [None]:
X_train_over, y_train_over = SMOTE( random_state=42).fit_resample(X_train, y_train)
X_train_over = torch.tensor(X_train_over).float()
y_train_over = torch.tensor(y_train_over).float()

In [None]:
slim_rmse = slim(X_train=X_train, y_train=y_train,
                  X_test=X_val, y_test=y_val, log_path='log/slim_rmse.csv', 
                   slim_version='SLIM+SIG2', pop_size=200, n_iter=1000, minimization=True, fitness_function='sigmoid_rmse',
                  ms_lower=0, ms_upper=0.5, p_inflate=0.5)

predictions = slim_rmse.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.43729355931282043 |   0.4102255702018738     |   0.08507895469665527  |      9           |
|     dataset_1           |       1      |   0.43384045362472534 |   0.4082792103290558     |   0.10199856758117676  |      24          |
|     dataset_1           |       2      |   0.43384045362472534 |   0.4082792103290558     |   0.10250282287597656  |      24          |
|     dataset_1           |       3      |   0.4325994551181793  |   0.40630215406417847    |   0.10356354713439941  |      57          |
|     dataset_1  

In [None]:
gsgp_f1 = gsgp(X_train=X_train, y_train=y_train,
                  X_test=X_val, y_test=y_val, reconstruct=True, log_path='log/gsgp_f1.csv', 
                  pop_size=500, n_iter=500, minimization=False, fitness_function='f1_score', seed = 0
                  )

predictions = gsgp_f1.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.5467127561569214  |   0.4146341383457184     |   0.5616695880889893   |      127         |
|     dataset_1           |       1      |   0.559999942779541   |   0.37974685430526733    |   0.28173089027404785  |      11          |
|     dataset_1           |       2      |   0.559999942779541   |   0.37974685430526733    |   0.2803232669830322   |      11          |
|     dataset_1           |       3      |   0.559999942779541   |   0.37974685430526733    |   0.2505354881286621   |      11          |
|     dataset_1  

In [None]:
gsgp_rmse = gsgp(X_train=X_train_over, y_train=y_train_over,
                  X_test=X_val, y_test=y_val, reconstruct=True, log_path='log/gsgp_rmse.csv',
                  pop_size=500, n_iter=500, minimization=True, fitness_function='sigmoid_rmse', seed = 0,
                  )

predictions = gsgp_rmse.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.478540301322937   |   0.5302056670188904     |   0.9723749160766602   |      5           |
|     dataset_1           |       1      |   0.4644612669944763  |   0.4699842631816864     |   0.625159740447998    |      11          |
|     dataset_1           |       2      |   0.46438002586364746 |   0.49265673756599426    |   0.6738924980163574   |      19          |
|     dataset_1           |       3      |   0.4607837200164795  |   0.4389142394065857     |   0.6179013252258301   |      23          |
|     dataset_1  

In [None]:
gp_f1 = gp(X_train=X_train, y_train=y_train, p_xo=0.2,
                  X_test=X_val, y_test=y_val, pop_size=200, n_iter=100, minimization=False, fitness_function='f1_score', seed = 0,
                  )
predictions = gp_f1.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.44525253772735596 |   0.45308926701545715    |   0.18932867050170898  |      5           |
|     dataset_1           |       1      |   0.44525253772735596 |   0.45308926701545715    |   0.2543361186981201   |      5           |
|     dataset_1           |       2      |   0.44525253772735596 |   0.45308926701545715    |   0.25008082389831543  |      5           |
|     dataset_1           |       3      |   0.44525253772735596 |   0.45308926701545715    |   0.31145787239074707  |      5           |
|     dataset_1  

In [None]:
gp_rmse = gp(X_train=X_train_over, y_train=y_train_over,
                  X_test=X_val, y_test=y_val, pop_size=200, n_iter=100, minimization=True, fitness_function='sigmoid_rmse', seed = 0,
                  )
predictions = gp_rmse.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.47669273614883423 |   0.5389356017112732     |   0.18155336380004883  |      5           |
|     dataset_1           |       1      |   0.47669273614883423 |   0.5389356017112732     |   0.1466360092163086   |      5           |
|     dataset_1           |       2      |   0.47613951563835144 |   0.5272106528282166     |   0.14223527908325195  |      5           |
|     dataset_1           |       3      |   0.4737030863761902  |   0.5282784700393677     |   0.1464700698852539   |      5           |
|     dataset_1  

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
rf = RandomForestClassifier(max_depth=10)
rf.fit(X_train, y_train)
predictions = rf.predict(X_test)
print_scores(y_test, predictions)

gb = GradientBoostingClassifier(max_depth=2)
gb.fit(X_train, y_train)
predictions = gb.predict(X_test)
print_scores(y_test, predictions)

Roc Score: 0.5826802507836991
F1 Score: 0.35294117647058826
Accuracy Score: 0.7046979865771812
Roc Score: 0.5890804597701149
F1 Score: 0.3548387096774194
Accuracy Score: 0.7315436241610739


In [None]:
final_tree = gp(X_train=X_train, y_train=y_train, log_path='log/test.csv', max_depth=15,
                X_test=X_val, y_test=y_val, log_level=1, seed=2444,
                dataset_name='xyz', pop_size=200, n_iter=100)

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     xyz                 |       0      |   0.3630543351173401  |   0.3774508535861969     |   0.10421562194824219  |      5           |
|     xyz                 |       1      |   0.35974404215812683 |   0.3771507441997528     |   0.10279154777526855  |      15          |
|     xyz                 |       2      |   0.35974404215812683 |   0.3771507441997528     |   0.10576272010803223  |      15          |
|     xyz                 |       3      |   0.35974404215812683 |   0.3771507441997528     |   0.1434328556060791   |      15          |
|     xyz        

Unnamed: 0,AF3,F7,F3,FC5,T7,P7,O1,O2,P8,T8,FC6,F4,F8,AF4,eyeDetection
0,4329.23,4009.23,4289.23,4148.21,4350.26,4586.15,4096.92,4641.03,4222.05,4238.46,4211.28,4280.51,4635.90,4393.85,b'0'
1,4324.62,4004.62,4293.85,4148.72,4342.05,4586.67,4097.44,4638.97,4210.77,4226.67,4207.69,4279.49,4632.82,4384.10,b'0'
2,4327.69,4006.67,4295.38,4156.41,4336.92,4583.59,4096.92,4630.26,4207.69,4222.05,4206.67,4282.05,4628.72,4389.23,b'0'
3,4328.72,4011.79,4296.41,4155.90,4343.59,4582.56,4097.44,4630.77,4217.44,4235.38,4210.77,4287.69,4632.31,4396.41,b'0'
4,4326.15,4011.79,4292.31,4151.28,4347.69,4586.67,4095.90,4627.69,4210.77,4244.10,4212.82,4288.21,4632.82,4398.46,b'0'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14975,4281.03,3990.26,4245.64,4116.92,4333.85,4614.36,4074.87,4625.64,4203.08,4221.54,4171.28,4269.23,4593.33,4340.51,b'1'
14976,4276.92,3991.79,4245.13,4110.77,4332.82,4615.38,4073.33,4621.54,4194.36,4217.44,4162.56,4259.49,4590.26,4333.33,b'1'
14977,4277.44,3990.77,4246.67,4113.85,4333.33,4615.38,4072.82,4623.59,4193.33,4212.82,4160.51,4257.95,4591.79,4339.49,b'1'
14978,4284.62,3991.79,4251.28,4122.05,4334.36,4616.41,4080.51,4628.72,4200.00,4220.00,4165.64,4267.18,4596.41,4350.77,b'1'


Unnamed: 0,season,age,child_diseases,accident,surgical_intervention,high_fevers,alcohol,smoking,hr_sittings,diagnosis
0,-0.33,0.69,0,1,1,0,0.8,0,0.88,N
1,-0.33,0.94,1,0,1,0,0.8,1,0.31,O
2,-0.33,0.5,1,0,0,0,1.0,-1,0.5,N
3,-0.33,0.75,0,1,1,0,1.0,-1,0.38,N
4,-0.33,0.67,1,1,0,0,0.8,-1,0.5,O
5,-0.33,0.67,1,0,1,0,0.8,0,0.5,N
6,-0.33,0.67,0,0,0,-1,0.8,-1,0.44,N
7,-0.33,1.0,1,1,1,0,0.6,-1,0.38,N
8,1.0,0.64,0,0,1,0,0.8,-1,0.25,N


Unnamed: 0,class,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V961,V962,V963,V964,V965,V966,V967,V968,V969,V970
0,b'0',0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,b'1',0.0,222.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,b'0',236.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,252.0,...,0.0,0.0,0.0,0.0,0.0,0.0,209.0,0.0,0.0,0.0
3,b'0',0.0,0.0,0.0,211.0,0.0,0.0,0.0,0.0,0.0,...,87.0,0.0,0.0,0.0,0.0,0.0,132.0,0.0,0.0,0.0
4,b'1',0.0,0.0,196.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,185.0,0.0,0.0,0.0,253.0,0.0,0.0,247.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3148,b'0',69.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,26.0,0.0,0.0,0.0,0.0,0.0,196.0,0.0,137.0,0.0
3149,b'0',0.0,0.0,252.0,0.0,0.0,0.0,0.0,0.0,0.0,...,253.0,0.0,0.0,0.0,0.0,0.0,252.0,0.0,0.0,252.0
3150,b'0',233.0,0.0,53.0,106.0,0.0,0.0,213.0,0.0,0.0,...,203.0,0.0,0.0,0.0,0.0,145.0,0.0,0.0,0.0,0.0
3151,b'0',0.0,0.0,220.0,0.0,0.0,0.0,0.0,0.0,254.0,...,0.0,0.0,0.0,0.0,0.0,0.0,254.0,0.0,0.0,0.0


Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V92,V93,V94,V95,V96,V97,V98,V99,V100,Class
0,39.02,36.49,38.20,38.85,39.38,39.74,37.02,39.53,38.81,38.79,...,36.62,36.92,38.80,38.52,38.07,36.73,39.46,37.50,39.10,b'0'
1,1.83,1.71,1.77,1.77,1.68,1.78,1.80,1.70,1.75,1.78,...,1.80,1.79,1.77,1.74,1.74,1.80,1.78,1.75,1.69,b'1'
2,68177.69,66138.42,72981.88,74304.33,67549.66,69367.34,69169.41,73268.61,74465.84,72503.37,...,73438.88,71053.35,71112.62,74916.48,72571.58,66348.97,71063.72,67404.27,74920.24,b'1'
3,44889.06,39191.86,40728.46,38576.36,45876.06,47034.00,46611.43,37668.32,40980.89,38466.15,...,42625.67,40684.20,46960.73,44546.80,45410.53,47139.44,43095.68,40888.34,39615.19,b'0'
4,5.70,5.40,5.28,5.38,5.27,5.61,6.00,5.38,5.34,5.87,...,5.17,5.67,5.60,5.94,5.73,5.22,5.30,5.73,5.91,b'0'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1207,13.00,12.87,13.27,13.04,13.19,12.53,14.31,13.33,13.63,14.55,...,12.48,12.15,13.15,12.35,13.58,13.86,12.88,13.87,13.51,b'1'
1208,48.66,50.11,48.55,50.43,50.09,49.67,48.95,48.65,48.63,48.61,...,46.93,49.61,47.16,48.17,47.94,49.81,49.89,47.43,47.77,b'0'
1209,10160.65,9048.63,8994.94,9514.39,9814.74,10195.24,10031.47,10202.28,9152.99,9591.75,...,9068.11,9191.80,9275.04,9848.18,9074.17,9601.74,10366.24,8997.60,9305.77,b'1'
1210,34.81,35.07,34.98,32.37,34.16,34.03,33.31,32.48,35.63,32.48,...,32.76,35.03,32.89,31.91,33.85,35.28,32.49,32.83,34.82,b'1'


Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,Class
0,65.0,0.0,0.7,0.1,187.0,16.0,18.0,6.8,3.3,0.90,b'1'
1,62.0,1.0,10.9,5.5,699.0,64.0,100.0,7.5,3.2,0.74,b'1'
2,62.0,1.0,7.3,4.1,490.0,60.0,68.0,7.0,3.3,0.89,b'1'
3,58.0,1.0,1.0,0.4,182.0,14.0,20.0,6.8,3.4,1.00,b'1'
4,72.0,1.0,3.9,2.0,195.0,27.0,59.0,7.3,2.4,0.40,b'1'
...,...,...,...,...,...,...,...,...,...,...,...
578,60.0,1.0,0.5,0.1,500.0,20.0,34.0,5.9,1.6,0.37,b'2'
579,40.0,1.0,0.6,0.1,98.0,35.0,31.0,6.0,3.2,1.10,b'1'
580,52.0,1.0,0.8,0.2,245.0,48.0,49.0,6.4,3.2,1.00,b'1'
581,31.0,1.0,1.3,0.5,184.0,29.0,32.0,6.8,3.4,1.00,b'1'


Unnamed: 0,mcv,alkphos,sgpt,sgot,gammagt,drinks,selector
0,85.0,92.0,45.0,27.0,31.0,0.0,b'1'
1,85.0,64.0,59.0,32.0,23.0,0.0,b'2'
2,86.0,54.0,33.0,16.0,54.0,0.0,b'2'
3,91.0,78.0,34.0,24.0,36.0,0.0,b'2'
4,87.0,70.0,12.0,28.0,10.0,0.0,b'2'
...,...,...,...,...,...,...,...
340,99.0,75.0,26.0,24.0,41.0,12.0,b'1'
341,96.0,69.0,53.0,43.0,203.0,12.0,b'2'
342,98.0,77.0,55.0,35.0,89.0,15.0,b'1'
343,91.0,68.0,27.0,26.0,14.0,16.0,b'1'


Unnamed: 0,molecule_name,conformation_name,f1,f2,f3,f4,f5,f6,f7,f8,...,f158,f159,f160,f161,f162,f163,f164,f165,f166,class
0,0.0,0.0,42.0,-198.0,-109.0,-75.0,-117.0,11.0,23.0,-88.0,...,-74.0,-129.0,-120.0,-38.0,30.0,48.0,-37.0,6.0,30.0,b'1'
1,0.0,1.0,42.0,-191.0,-142.0,-65.0,-117.0,55.0,49.0,-170.0,...,-302.0,60.0,-120.0,-39.0,31.0,48.0,-37.0,5.0,30.0,b'1'
2,0.0,2.0,42.0,-191.0,-142.0,-75.0,-117.0,11.0,49.0,-161.0,...,-73.0,-127.0,-120.0,-38.0,30.0,48.0,-37.0,5.0,31.0,b'1'
3,0.0,3.0,42.0,-198.0,-110.0,-65.0,-117.0,55.0,23.0,-95.0,...,-302.0,60.0,-120.0,-39.0,30.0,48.0,-37.0,6.0,30.0,b'1'
4,1.0,4.0,42.0,-198.0,-102.0,-75.0,-117.0,10.0,24.0,-87.0,...,-73.0,-127.0,51.0,128.0,144.0,43.0,-30.0,14.0,26.0,b'1'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,91.0,471.0,49.0,-199.0,-161.0,29.0,-95.0,-86.0,-48.0,2.0,...,-246.0,-209.0,33.0,152.0,134.0,47.0,-43.0,-15.0,-10.0,b'0'
472,91.0,472.0,38.0,-123.0,-139.0,30.0,-117.0,-88.0,214.0,-13.0,...,-226.0,-210.0,20.0,55.0,119.0,79.0,-28.0,4.0,74.0,b'0'
473,91.0,473.0,43.0,-102.0,-20.0,-101.0,-116.0,200.0,-166.0,66.0,...,32.0,136.0,-15.0,143.0,121.0,55.0,-37.0,-19.0,-36.0,b'0'
474,91.0,474.0,39.0,-58.0,27.0,31.0,-117.0,-92.0,85.0,21.0,...,-232.0,-206.0,13.0,45.0,116.0,79.0,-28.0,3.0,74.0,b'0'


Unnamed: 0,WSR0,WSR1,WSR2,WSR3,WSR4,WSR5,WSR6,WSR7,WSR8,WSR9,...,RH50,U50,V50,HT50,KI,TT,SLP,SLP_,Precp,Class
0,b'0.8',b'1.8',b'2.4',b'2.1',b'2',b'2.1',b'1.5',b'1.7',b'1.9',b'2.3',...,b'0.15',b'10.67',b'-1.56',b'5795',b'-12.1',b'17.9',b'10330',b'-55',b'0',0.0
1,b'2.8',b'3.2',b'3.3',b'2.7',b'3.3',b'3.2',b'2.9',b'2.8',b'3.1',b'3.4',...,b'0.48',b'8.39',b'3.84',b'5805',b'14.05',b'29',b'10275',b'-55',b'0',0.0
2,b'2.9',b'2.8',b'2.6',b'2.1',b'2.2',b'2.5',b'2.5',b'2.7',b'2.2',b'2.5',...,b'0.6',b'6.94',b'9.8',b'5790',b'17.9',b'41.3',b'10235',b'-40',b'0',0.0
3,b'4.7',b'3.8',b'3.7',b'3.8',b'2.9',b'3.1',b'2.8',b'2.5',b'2.4',b'3.1',...,b'0.49',b'8.73',b'10.54',b'5775',b'31.15',b'51.7',b'10195',b'-40',b'2.08',0.0
4,b'2.6',b'2.1',b'1.6',b'1.4',b'0.9',b'1.5',b'1.2',b'1.4',b'1.3',b'1.4',...,b'?',b'?',b'?',b'?',b'?',b'?',b'?',b'?',b'0.58',0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2531,b'0.3',b'0.4',b'0.5',b'0.5',b'0.2',b'0.3',b'0.4',b'0.4',b'1.3',b'2.2',...,b'0.07',b'7.93',b'-4.41',b'5800',b'-25.6',b'21.8',b'10295',b'65',b'0',0.0
2532,b'1',b'1.4',b'1.1',b'1.7',b'1.5',b'1.7',b'1.8',b'1.5',b'2.1',b'2.4',...,b'0.04',b'5.95',b'-1.14',b'5845',b'-19.4',b'19.1',b'10310',b'15',b'0',0.0
2533,b'0.8',b'0.8',b'1.2',b'0.9',b'0.4',b'0.6',b'0.8',b'1.1',b'1.5',b'1.5',...,b'0.06',b'7.8',b'-0.64',b'5845',b'-9.6',b'35.2',b'10275',b'-35',b'0',0.0
2534,b'1.3',b'0.9',b'1.5',b'1.2',b'1.6',b'1.8',b'1.1',b'1',b'1.9',b'2',...,b'0.25',b'7.72',b'-0.89',b'5845',b'-19.6',b'34.2',b'10245',b'-30',b'0.05',0.0


Unnamed: 0,loc,v(g),ev(g),iv(G),N,V,L,D,I,E,...,lOCode,lOComment,locCodeAndComment,lOBlank,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount,defects
0,1.1,1.4,1.4,1.4,1.3,1.30,1.30,1.30,1.30,1.30,...,2.0,2.0,2.0,2.0,1.2,1.2,1.2,1.2,1.4,b'false'
1,1.0,1.0,1.0,1.0,1.0,1.00,1.00,1.00,1.00,1.00,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,b'true'
2,91.0,9.0,3.0,2.0,318.0,2089.21,0.04,27.68,75.47,57833.24,...,80.0,44.0,11.0,31.0,29.0,66.0,192.0,126.0,17.0,b'true'
3,109.0,21.0,5.0,18.0,381.0,2547.56,0.04,28.37,89.79,72282.68,...,97.0,41.0,12.0,24.0,28.0,75.0,229.0,152.0,38.0,b'true'
4,505.0,106.0,41.0,82.0,2339.0,20696.93,0.01,75.93,272.58,1571506.88,...,457.0,71.0,48.0,49.0,64.0,397.0,1397.0,942.0,178.0,b'true'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1104,6.0,4.0,4.0,1.0,26.0,96.21,0.08,13.33,7.22,1282.82,...,6.0,0.0,0.0,2.0,10.0,3.0,18.0,8.0,7.0,b'false'
1105,10.0,5.0,5.0,1.0,43.0,182.66,0.05,21.00,8.70,3835.88,...,10.0,0.0,0.0,1.0,14.0,5.0,28.0,15.0,9.0,b'false'
1106,5.0,3.0,3.0,1.0,17.0,62.91,0.21,4.80,13.11,301.96,...,5.0,0.0,0.0,0.0,8.0,5.0,11.0,6.0,5.0,b'false'
1107,18.0,8.0,5.0,5.0,111.0,613.12,0.04,22.92,26.75,14050.56,...,18.0,0.0,0.0,1.0,22.0,24.0,61.0,50.0,15.0,b'false'


Unnamed: 0,LOC_BLANK,BRANCH_COUNT,CALL_PAIRS,LOC_CODE_AND_COMMENT,LOC_COMMENTS,CONDITION_COUNT,CYCLOMATIC_COMPLEXITY,CYCLOMATIC_DENSITY,DECISION_COUNT,DECISION_DENSITY,...,NODE_COUNT,NORMALIZED_CYLOMATIC_COMPLEXITY,NUM_OPERANDS,NUM_OPERATORS,NUM_UNIQUE_OPERANDS,NUM_UNIQUE_OPERATORS,NUMBER_OF_LINES,PERCENT_COMMENTS,LOC_TOTAL,c
0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,0.10,0.0,0.00,...,2.0,0.08,28.0,29.0,9.0,5.0,13.0,0.00,10.0,b'FALSE'
1,1.0,1.0,4.0,0.0,0.0,0.0,1.0,0.07,0.0,0.00,...,6.0,0.06,52.0,55.0,26.0,14.0,16.0,0.00,14.0,b'FALSE'
2,27.0,19.0,1.0,4.0,13.0,26.0,11.0,0.26,12.0,2.17,...,25.0,0.13,58.0,78.0,30.0,24.0,83.0,30.91,42.0,b'FALSE'
3,2.0,17.0,2.0,0.0,0.0,24.0,9.0,0.47,8.0,3.00,...,25.0,0.41,73.0,81.0,23.0,20.0,22.0,0.00,19.0,b'FALSE'
4,6.0,1.0,1.0,0.0,2.0,0.0,1.0,0.11,0.0,0.00,...,3.0,0.06,19.0,23.0,15.0,7.0,18.0,18.18,9.0,b'FALSE'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1558,6.0,3.0,3.0,0.0,8.0,4.0,2.0,0.40,2.0,2.00,...,7.0,0.10,16.0,20.0,13.0,9.0,20.0,61.54,5.0,b'FALSE'
1559,3.0,5.0,3.0,0.0,0.0,6.0,3.0,0.33,2.0,3.00,...,9.0,0.23,20.0,32.0,10.0,14.0,13.0,0.00,9.0,b'FALSE'
1560,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.00,0.0,0.00,...,2.0,1.00,0.0,2.0,0.0,2.0,1.0,0.00,0.0,b'FALSE'
1561,0.0,7.0,0.0,0.0,0.0,10.0,4.0,0.36,4.0,2.50,...,9.0,0.33,19.0,23.0,13.0,16.0,12.0,0.00,11.0,b'FALSE'


NotImplementedError: String attributes not supported yet, sorry

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,Class
0,1.0,1.0,22.0,22.0,22.0,19.0,18.0,14.0,49.895756,17.775994,5.270920,0.771761,0.018632,0.006864,0.003923,0.003923,0.486903,0.100025,1.0,b'0'
1,1.0,1.0,24.0,24.0,22.0,18.0,16.0,13.0,57.709936,23.799994,3.325423,0.234185,0.003903,0.003903,0.003903,0.003903,0.520908,0.144414,0.0,b'0'
2,1.0,1.0,62.0,60.0,59.0,54.0,47.0,33.0,55.831441,27.993933,12.687485,4.852282,1.393889,0.373252,0.041817,0.007744,0.530904,0.128548,0.0,b'1'
3,1.0,1.0,55.0,53.0,53.0,50.0,43.0,31.0,40.467228,18.445954,9.118901,3.079428,0.840261,0.272434,0.007653,0.001531,0.483284,0.114790,0.0,b'0'
4,1.0,1.0,44.0,44.0,44.0,41.0,39.0,27.0,18.026254,8.570709,0.410381,0.000000,0.000000,0.000000,0.000000,0.000000,0.475935,0.123572,0.0,b'1'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1146,1.0,1.0,34.0,34.0,34.0,33.0,31.0,24.0,6.071765,0.937472,0.031145,0.003115,0.000000,0.000000,0.000000,0.000000,0.537470,0.116795,0.0,b'0'
1147,1.0,1.0,49.0,49.0,49.0,49.0,45.0,37.0,63.197145,27.377668,8.067688,0.979548,0.001552,0.000000,0.000000,0.000000,0.516733,0.124190,0.0,b'0'
1148,1.0,0.0,49.0,48.0,48.0,45.0,43.0,33.0,30.461898,13.966980,1.763305,0.137858,0.011221,0.000000,0.000000,0.000000,0.560632,0.129843,0.0,b'0'
1149,1.0,1.0,39.0,36.0,29.0,23.0,13.0,7.0,40.525739,12.604947,4.740919,1.077570,0.563518,0.326860,0.239568,0.174584,0.485972,0.106690,1.0,b'1'


Unnamed: 0,Att1,Att2,Att3,Att4,Att5,Att6,Att7,Att8,Att9,Att10,...,Att291,Att292,Att293,Att294,Beach,Sunset,FallFoliage,Field,Mountain,Urban
0,0.646467,0.666435,0.685047,0.699053,0.652746,0.407864,0.150309,0.535193,0.555689,0.580782,...,0.157332,0.247298,0.014025,0.029709,b'TRUE',b'FALSE',b'FALSE',b'FALSE',b'TRUE',b'FALSE'
1,0.770156,0.767255,0.761053,0.745630,0.742231,0.688086,0.708416,0.757351,0.760633,0.740314,...,0.251454,0.137833,0.082672,0.036320,b'TRUE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'TRUE'
2,0.793984,0.772096,0.761820,0.762213,0.740569,0.734361,0.722677,0.849128,0.839607,0.812746,...,0.017166,0.051125,0.112506,0.083924,b'TRUE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'FALSE'
3,0.938563,0.949260,0.955621,0.966743,0.968649,0.869619,0.696925,0.953460,0.959631,0.966320,...,0.019267,0.031290,0.049780,0.090959,b'TRUE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'FALSE'
4,0.512130,0.524684,0.520020,0.504467,0.471209,0.417654,0.364292,0.562266,0.588592,0.584449,...,0.198151,0.238796,0.164270,0.184290,b'TRUE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'FALSE'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2402,0.875782,0.901653,0.926227,0.721366,0.795826,0.867642,0.794125,0.899067,0.908963,0.895336,...,0.215147,0.279607,0.254413,0.134350,b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'TRUE'
2403,0.657706,0.669877,0.692338,0.713920,0.727374,0.750354,0.684372,0.718770,0.719916,0.730645,...,0.217201,0.199491,0.048747,0.041638,b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'TRUE'
2404,0.952281,0.944987,0.905556,0.836604,0.875916,0.957034,0.953938,0.967956,0.819636,0.707311,...,0.028002,0.031900,0.017547,0.019734,b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'TRUE'
2405,0.883990,0.899004,0.901019,0.904298,0.846402,0.858145,0.851362,0.852472,0.876665,0.908187,...,0.239041,0.256158,0.226332,0.223070,b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'FALSE',b'TRUE'


Unnamed: 0,word_freq_make,word_freq_address,word_freq_all,word_freq_3d,word_freq_our,word_freq_over,word_freq_remove,word_freq_internet,word_freq_order,word_freq_mail,...,char_freq_%3B,char_freq_%28,char_freq_%5B,char_freq_%21,char_freq_%24,char_freq_%23,capital_run_length_average,capital_run_length_longest,capital_run_length_total,class
0,0.00,0.64,0.64,0.0,0.32,0.00,0.00,0.00,0.00,0.00,...,0.000,0.000,0.0,0.778,0.000,0.000,3.756,61.0,278.0,b'1'
1,0.21,0.28,0.50,0.0,0.14,0.28,0.21,0.07,0.00,0.94,...,0.000,0.132,0.0,0.372,0.180,0.048,5.114,101.0,1028.0,b'1'
2,0.06,0.00,0.71,0.0,1.23,0.19,0.19,0.12,0.64,0.25,...,0.010,0.143,0.0,0.276,0.184,0.010,9.821,485.0,2259.0,b'1'
3,0.00,0.00,0.00,0.0,0.63,0.00,0.31,0.63,0.31,0.63,...,0.000,0.137,0.0,0.137,0.000,0.000,3.537,40.0,191.0,b'1'
4,0.00,0.00,0.00,0.0,0.63,0.00,0.31,0.63,0.31,0.63,...,0.000,0.135,0.0,0.135,0.000,0.000,3.537,40.0,191.0,b'1'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4596,0.31,0.00,0.62,0.0,0.00,0.31,0.00,0.00,0.00,0.00,...,0.000,0.232,0.0,0.000,0.000,0.000,1.142,3.0,88.0,b'0'
4597,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,...,0.000,0.000,0.0,0.353,0.000,0.000,1.555,4.0,14.0,b'0'
4598,0.30,0.00,0.30,0.0,0.00,0.00,0.00,0.00,0.00,0.00,...,0.102,0.718,0.0,0.000,0.000,0.000,1.404,6.0,118.0,b'0'
4599,0.96,0.00,0.00,0.0,0.32,0.00,0.00,0.00,0.00,0.00,...,0.000,0.057,0.0,0.000,0.000,0.000,1.147,5.0,78.0,b'0'


Unnamed: 0,OVERALL_DIAGNOSIS,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22
0,b'1',b'0',b'0',b'0',b'1',b'0',b'0',b'0',b'1',b'1',...,b'1',b'1',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0'
1,b'1',b'0',b'0',b'1',b'1',b'0',b'0',b'0',b'1',b'1',...,b'1',b'1',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'1'
2,b'1',b'1',b'0',b'1',b'0',b'1',b'0',b'0',b'1',b'0',...,b'1',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0'
3,b'1',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',...,b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'1',b'1',b'1'
4,b'1',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'1',b'0',...,b'1',b'0',b'1',b'1',b'0',b'0',b'0',b'0',b'0',b'0'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
262,b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',...,b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0',b'0'
263,b'0',b'1',b'1',b'0',b'0',b'0',b'1',b'0',b'0',b'0',...,b'0',b'0',b'0',b'1',b'0',b'0',b'0',b'0',b'0',b'0'
264,b'0',b'1',b'0',b'1',b'0',b'1',b'0',b'0',b'1',b'0',...,b'1',b'0',b'1',b'1',b'0',b'0',b'0',b'0',b'0',b'0'
265,b'0',b'1',b'0',b'1',b'0',b'1',b'0',b'0',b'1',b'1',...,b'0',b'1',b'0',b'1',b'0',b'0',b'0',b'0',b'0',b'0'
