#**Stunting Classification Using Random Forest and Logistic Regression**

---



In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
!ls

sample_data


# **Import Dataset**

In [None]:
# from google.colab import files
# uploaded = files.upload()

!gdown 1U4rB8x2uZcvV9MEOB3qYPBa1LzaObUi6

Downloading...
From: https://drive.google.com/uc?id=1U4rB8x2uZcvV9MEOB3qYPBa1LzaObUi6
To: /content/Data Stunting.xlsx
  0% 0.00/300k [00:00<?, ?B/s]100% 300k/300k [00:00<00:00, 29.9MB/s]


In [None]:
# Impor dataset dari file Excel
df = pd.read_excel('Data Stunting.xlsx')

In [None]:
# Call dataset
df

Unnamed: 0,Nama,JK,Usia,Berat,Tinggi,Status
0,FAISAL FAKIH RUKMANA,L,2.466667,4.9,58,Tidak Stunting
1,MUHAMMAD HAFIDZ ATHAFARIZ S,L,58.466667,17.0,109,Tidak Stunting
2,VIONA FEBBY YASMIN,P,5.166667,7.1,68.5,Tidak Stunting
3,GHEGHE KALISTA PUTRI,P,45.300000,13.8,95,Tidak Stunting
4,LINGGA OKTAVIAN WIRATAMA,L,53.633333,18.6,106,Tidak Stunting
...,...,...,...,...,...,...
6672,KAUTSARRAKY,L,0.000000,3.0,50,Tidak Stunting
6673,ALEENA H,P,0.000000,3.0,50,Tidak Stunting
6674,M EMRAN,L,0.000000,3.0,50,Stunting
6675,REZVAN A P,L,0.000000,3.0,50,Tidak Stunting


# **Data Cleaning**

In [None]:
# Check Missing Value
nan_data = df[df.isna().any(axis=1)]
nan_data

Unnamed: 0,Nama,JK,Usia,Berat,Tinggi,Status


In [None]:
missing_values = df.isnull().sum()
print(missing_values)

Nama      0
JK        0
Usia      0
Berat     0
Tinggi    0
Status    0
dtype: int64


In [None]:
# Mengecek adakah mixed value
df.dtypes

Nama       object
JK         object
Usia      float64
Berat     float64
Tinggi     object
Status     object
dtype: object

In [None]:
# Mengecek baris mana yang tidak bertipe float pada kolom Tinggi
non_float_rows = df[~df['Tinggi'].apply(lambda x: isinstance(x, float))]
print(non_float_rows)

                              Nama  JK       Usia  Berat Tinggi  \
0            FAISAL FAKIH RUKMANA   L    2.466667    4.9     58   
1     MUHAMMAD HAFIDZ ATHAFARIZ S   L   58.466667   17.0    109   
3            GHEGHE KALISTA PUTRI   P   45.300000   13.8     95   
4        LINGGA OKTAVIAN WIRATAMA   L   53.633333   18.6    106   
6              ASTI RAHMA SAPITRI   P   59.533333   18.5    107   
...                            ...  ..        ...    ...    ...   
6672                  KAUTSARRAKY   L    0.000000    3.0     50   
6673                     ALEENA H   P    0.000000    3.0     50   
6674                      M EMRAN   L    0.000000    3.0     50   
6675                   REZVAN A P   L    0.000000    3.0     50   
6676     APRIANZA QEISYAM GUNAWAN   L    0.000000    2.8     48   

              Status  
0     Tidak Stunting  
1     Tidak Stunting  
3     Tidak Stunting  
4     Tidak Stunting  
6     Tidak Stunting  
...              ...  
6672  Tidak Stunting  
6673  Tidak

In [None]:
df['Tinggi'] = df['Tinggi'].fillna('')

In [None]:
# Ketika diconvert ke float error, ternyata terdapat baris yang mengandung (-)
strip_rows = df[df['Tinggi'].astype(str).str.contains('-')]
print(strip_rows)

                  Nama  JK       Usia  Berat Tinggi          Status
2087  ARKENZY ZAVIYAR   L   37.633333   13.4      -  Tidak Stunting


In [None]:
df = df.drop(2087)

In [None]:
df['Tinggi'] = df['Tinggi'].astype(float)
print(df['Tinggi'].dtype)

float64


In [None]:
df

Unnamed: 0,Nama,JK,Usia,Berat,Tinggi,Status
0,FAISAL FAKIH RUKMANA,L,2.466667,4.9,58.0,Tidak Stunting
1,MUHAMMAD HAFIDZ ATHAFARIZ S,L,58.466667,17.0,109.0,Tidak Stunting
2,VIONA FEBBY YASMIN,P,5.166667,7.1,68.5,Tidak Stunting
3,GHEGHE KALISTA PUTRI,P,45.300000,13.8,95.0,Tidak Stunting
4,LINGGA OKTAVIAN WIRATAMA,L,53.633333,18.6,106.0,Tidak Stunting
...,...,...,...,...,...,...
6672,KAUTSARRAKY,L,0.000000,3.0,50.0,Tidak Stunting
6673,ALEENA H,P,0.000000,3.0,50.0,Tidak Stunting
6674,M EMRAN,L,0.000000,3.0,50.0,Stunting
6675,REZVAN A P,L,0.000000,3.0,50.0,Tidak Stunting


In [None]:
# Menghapus kolom yang tidak dibutuhkan
df = df.drop(['Nama'], axis=1)
df

Unnamed: 0,JK,Usia,Berat,Tinggi,Status
0,L,2.466667,4.9,58.0,Tidak Stunting
1,L,58.466667,17.0,109.0,Tidak Stunting
2,P,5.166667,7.1,68.5,Tidak Stunting
3,P,45.300000,13.8,95.0,Tidak Stunting
4,L,53.633333,18.6,106.0,Tidak Stunting
...,...,...,...,...,...
6672,L,0.000000,3.0,50.0,Tidak Stunting
6673,P,0.000000,3.0,50.0,Tidak Stunting
6674,L,0.000000,3.0,50.0,Stunting
6675,L,0.000000,3.0,50.0,Tidak Stunting


# **Data Processing**

**1. Mengecek Kemungkinan Imbalance Data**

In [None]:
min_label_len = len(df[df['Status']=='Stunting'])
print(min_label_len)

64


In [None]:
max_label_index = df[df['Status']=='Tidak Stunting'].index
print(max_label_index)

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            6664, 6665, 6667, 6669, 6670, 6671, 6672, 6673, 6675, 6676],
           dtype='int64', length=6612)


In [None]:
# Terjadi imbalance karena stunting sebanyak 64 data sedangkan tidak stunting 6612 data

**2. Undersampling**

In [None]:
# Melakukan undersampling pada label yang terbesar
random_max_index = np.random.choice(max_label_index,
                                    min_label_len,
                                    replace=False)
print(len(random_max_index))

64


In [None]:
min_label_index = df[df['Status']=='Stunting'].index
print(min_label_index)

Int64Index([  63,  101,  198,  200,  227,  242,  244,  283,  407,  423,  443,
             524,  544,  575,  591,  594,  796,  911, 1033, 1042, 1080, 1129,
            1154, 1245, 1644, 1650, 1836, 1883, 1930, 2089, 2184, 2187, 2434,
            2485, 2748, 2804, 2883, 2911, 2936, 2976, 3154, 3531, 3576, 4203,
            4665, 4767, 5019, 5114, 5134, 5346, 5358, 6557, 6559, 6560, 6561,
            6623, 6626, 6634, 6649, 6653, 6661, 6666, 6668, 6674],
           dtype='int64')


In [None]:
under_sample_index = np.concatenate([min_label_index, random_max_index])
under_sample = df.loc[under_sample_index]
print(under_sample_index)

# Menentukan fitur dan label
x = under_sample.loc[:, df.columns != 'Status']
y = under_sample.loc[:, df.columns == 'Status']

[  63  101  198  200  227  242  244  283  407  423  443  524  544  575
  591  594  796  911 1033 1042 1080 1129 1154 1245 1644 1650 1836 1883
 1930 2089 2184 2187 2434 2485 2748 2804 2883 2911 2936 2976 3154 3531
 3576 4203 4665 4767 5019 5114 5134 5346 5358 6557 6559 6560 6561 6623
 6626 6634 6649 6653 6661 6666 6668 6674 1116 5854  672 3687 1257 3458
 2905 4421 4950 5489 3376 1709 2827 5892 1032  686 6604 1854  752 2797
 3093 4117 2177 1768 6163 1441 2337 3163 3842 6510 6536  991 5644 5047
 2809 3945 6167 3130 1554 4003 2571 2681 4721  214 6514 2136  735 1341
 6483 6574 1704 4642 6264 2858   43 3210 2694 3041 3234 2968 4651 1255
 1305 4453]


In [None]:
print(x)

      JK       Usia  Berat  Tinggi
63    P   19.300000    5.7    72.0
101   P   35.300000   10.7    82.2
198   L   14.500000   10.0    73.0
200   L   50.866667   12.0    95.0
227   L    4.966667    6.6    60.3
...   ..        ...    ...     ...
2968  L   13.633333    9.0    78.0
4651  L   37.000000   14.5    96.0
1255  P   33.766667   12.0    95.0
1305  P   14.100000    9.8    79.0
4453  P   16.333333   10.5    78.0

[128 rows x 4 columns]


In [None]:
print(y)

              Status
63          Stunting
101         Stunting
198         Stunting
200         Stunting
227         Stunting
...              ...
2968  Tidak Stunting
4651  Tidak Stunting
1255  Tidak Stunting
1305  Tidak Stunting
4453  Tidak Stunting

[128 rows x 1 columns]


**3. Oversample**

In [None]:
# pip install pandas scikit-learn

In [None]:
# from imblearn.over_sampling import RandomOverSampler
# from collections import Counter

# x = df.drop('Status', axis=1)
# y = df['Status']

# # Display class distribution before oversampling
# print("Class distribution before oversampling:", Counter(y))

# # Apply oversampling
# oversampler = RandomOverSampler()
# x_resampled, y_resampled = oversampler.fit_resample(x, y)

# # Display class distribution after oversampling
# print("Class distribution after oversampling:", Counter(y_resampled))


In [None]:
# # Combine the oversampled features and labels into a new DataFrame
# df_resampled = pd.concat([pd.DataFrame(x_resampled, columns=x.columns), pd.Series(y_resampled, name='Status')], axis=1)

# # # Save the oversampled dataset to a new CSV file
# # df_resampled.to_csv('path/to/oversampled/dataset.csv', index=False)


In [None]:
# df_resampled

In [None]:
# min_label_len = len(df_resampled[df_resampled['Status']=='Stunting'])
# print(min_label_len)

In [None]:
# max_label_index = df_resampled[df_resampled['Status']=='Tidak Stunting'].index
# print(max_label_index)

In [None]:
# print(x_resampled)

In [None]:
# print(y_resampled)

# TRANSFORMASI

In [None]:
# TRANSFORMASI & ENCODING Kolom Fitur
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

column_transformer = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
x = np.array(column_transformer.fit_transform(x))

In [None]:
print(x)

[[  0.           1.          19.3          5.7         72.        ]
 [  0.           1.          35.3         10.7         82.2       ]
 [  1.           0.          14.5         10.          73.        ]
 [  1.           0.          50.86666667  12.          95.        ]
 [  1.           0.           4.96666667   6.6         60.3       ]
 [  0.           1.           7.2          5.5         62.3       ]
 [  1.           0.          20.93333333   9.5         79.        ]
 [  0.           1.          20.2         10.5         74.        ]
 [  0.           1.          58.86666667  12.5         98.        ]
 [  0.           1.          28.33333333   9.          80.        ]
 [  1.           0.          41.63333333  10.          90.        ]
 [  0.           1.           0.           1.8         47.        ]
 [  0.           1.          34.          12.          86.        ]
 [  0.           1.          21.           8.          75.        ]
 [  1.           0.          23.          10.8  

In [None]:
# ENCODING pada Kolom Label
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

  y = column_or_1d(y, warn=True)


In [None]:
print(y[:1000])
# y_resampled[-1:-501:-1]

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


**3. Split Data**

In [None]:
# Split train set dan Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

**4. Scaling Data**

In [None]:
from sklearn.preprocessing import StandardScaler
standard_scal = StandardScaler()

# Fit and transform the training set
x_train[:, 2:] = standard_scal.fit_transform(x_train[:, 2:])

# Transform the test set using the same scaler
x_test[:, 2:] = standard_scal.transform(x_test[:, 2:])

In [None]:
print(x_train)

[[ 1.00000000e+00  0.00000000e+00  5.50684090e-01  9.78332352e-01
   3.08187065e-01]
 [ 1.00000000e+00  0.00000000e+00 -1.69103330e-01  3.87026512e-01
   3.49521819e-01]
 [ 1.00000000e+00  0.00000000e+00 -2.40068287e-01 -3.45066434e-01
  -4.49616763e-01]
 [ 0.00000000e+00  1.00000000e+00 -5.58396808e-01 -9.36372274e-01
  -6.56290535e-01]
 [ 0.00000000e+00  1.00000000e+00  8.95371023e-01  1.25990656e+00
   8.59317122e-01]
 [ 1.00000000e+00  0.00000000e+00  9.32284728e-03 -1.76121908e-01
  -1.82359210e-03]
 [ 1.00000000e+00  0.00000000e+00  3.25623798e-01 -2.32436750e-01
   1.70404551e-01]
 [ 0.00000000e+00  1.00000000e+00  6.74365872e-01  2.18081986e-01
   5.83752093e-01]
 [ 1.00000000e+00  0.00000000e+00  9.85359360e-02  6.68600722e-01
   6.11308596e-01]
 [ 1.00000000e+00  0.00000000e+00  4.47278010e-01  8.93860089e-01
   1.49737174e-01]
 [ 1.00000000e+00  0.00000000e+00 -9.15249163e-01 -3.73223854e-01
  -3.80725506e-01]
 [ 0.00000000e+00  1.00000000e+00  9.39977568e-01  8.09387826e-01

In [None]:
print(x_train.shape)
print(y_train.shape)

(102, 5)
(102,)


# **Model**

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
# Create a logistic regression model
logistic_model  = LogisticRegression(C=0.1, solver='lbfgs', max_iter=1000, class_weight='balanced')

# Fit the model to the training data
logistic_model .fit(x_train, y_train)

In [None]:
# Training Accuracy
y_pred_train_logistic = logistic_model.predict(x_train)

# Calculate training set accuracy
accuracy_train = accuracy_score(y_train, y_pred_train_logistic)
print(f"Training Set Accuracy: {accuracy_train:.4f}")

Training Set Accuracy: 0.7941


In [None]:
# Make predictions on the test set
y_pred_logistic  = logistic_model.predict(x_test)

# Calculate accuracy
accuracy_logistic  = accuracy_score(y_test, y_pred_logistic)
print(f"Accuracy: {accuracy_logistic:.4f}")

# Generate a classification report
print(classification_report(y_test, y_pred_logistic, digits=4))

# Create a confusion matrix
confusion_logistic  = confusion_matrix(y_test, y_pred_logistic)
print("Confusion Matrix:")
print(confusion_logistic )


Accuracy: 0.8462
              precision    recall  f1-score   support

           0     0.8667    0.8667    0.8667        15
           1     0.8182    0.8182    0.8182        11

    accuracy                         0.8462        26
   macro avg     0.8424    0.8424    0.8424        26
weighted avg     0.8462    0.8462    0.8462        26

Confusion Matrix:
[[13  2]
 [ 2  9]]


## Random Forest


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
# Create a Random Forest Classifier model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10) #n_estimators=100, max_depth=10, min_samples_split=500, min_samples_leaf=200

# Fit the model to the training data
rf_model.fit(x_train, y_train)

In [None]:
# Training Accuracy
y_pred_train_rf = rf_model.predict(x_train)

# Calculate training set accuracy
accuracy_train = accuracy_score(y_train, y_pred_train_rf)
print(f"Training Set Accuracy: {accuracy_train:.4f}")

Training Set Accuracy: 1.0000


In [None]:
# Make predictions on the test set
y_pred_rf  = rf_model.predict(x_test)

# Calculate accuracy
accuracy_rf  = accuracy_score(y_test, y_pred_rf )
print(f"Accuracy: {accuracy_rf :.4f}")

# Generate a classification report
print(classification_report(y_test, y_pred_rf, digits=4))

# Create a confusion matrix
confusion_rf  = confusion_matrix(y_test, y_pred_rf )
print("Confusion Matrix:")
print(confusion_rf )

Accuracy: 0.8077
              precision    recall  f1-score   support

           0     0.8571    0.8000    0.8276        15
           1     0.7500    0.8182    0.7826        11

    accuracy                         0.8077        26
   macro avg     0.8036    0.8091    0.8051        26
weighted avg     0.8118    0.8077    0.8086        26

Confusion Matrix:
[[12  3]
 [ 2  9]]


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Assuming X and y are your feature matrix and target variable
# Perform the loop for 10 iterations (you can adjust this number)
num_iterations = 100
average_metrics_rf = {'accuracy': 0, 'precision_0': 0, 'recall_0': 0, 'f1_0': 0, 'precision_1': 0, 'recall_1': 0, 'f1_1': 0}
average_metrics_lr = {'accuracy': 0, 'precision_0': 0, 'recall_0': 0, 'f1_0': 0, 'precision_1': 0, 'recall_1': 0, 'f1_1': 0}

for i in range(num_iterations):
    # Split the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)  # You may set a random seed for reproducibility

    # Scaling data
    standard_scal = StandardScaler()
    # Fit and transform the training set
    x_train[:, 2:] = standard_scal.fit_transform(x_train[:, 2:])
    # Transform the test set using the same scaler
    x_test[:, 2:] = standard_scal.transform(x_test[:, 2:])

    # Train RandomForestClassifier
    rf_model = RandomForestClassifier()
    rf_model.fit(x_train, y_train)

    # Make predictions on the test set using RandomForestClassifier
    y_pred_rf = rf_model.predict(x_test)

    # Calculate accuracy and other metrics for RandomForestClassifier
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    report_rf = classification_report(y_test, y_pred_rf, output_dict=True)

    # Print accuracy for each iteration for RandomForestClassifier
    print(f"Iteration {i + 1} - RandomForest - Accuracy: {accuracy_rf:.4f}")

    # Accumulate metrics for averaging for RandomForestClassifier
    average_metrics_rf['accuracy'] += accuracy_rf
    average_metrics_rf['precision_0'] += report_rf['0']['precision']
    average_metrics_rf['recall_0'] += report_rf['0']['recall']
    average_metrics_rf['f1_0'] += report_rf['0']['f1-score']
    average_metrics_rf['precision_1'] += report_rf['1']['precision']
    average_metrics_rf['recall_1'] += report_rf['1']['recall']
    average_metrics_rf['f1_1'] += report_rf['1']['f1-score']

    # Train LogisticRegression
    lr_model = LogisticRegression()
    lr_model.fit(x_train, y_train)

    # Make predictions on the test set using LogisticRegression
    y_pred_lr = lr_model.predict(x_test)

    # Calculate accuracy and other metrics for LogisticRegression
    accuracy_lr = accuracy_score(y_test, y_pred_lr)
    report_lr = classification_report(y_test, y_pred_lr, output_dict=True)

    # Print accuracy for each iteration for LogisticRegression
    print(f"Iteration {i + 1} - LogisticRegression - Accuracy: {accuracy_lr:.4f}")

    # Accumulate metrics for averaging for LogisticRegression
    average_metrics_lr['accuracy'] += accuracy_lr
    average_metrics_lr['precision_0'] += report_lr['0']['precision']
    average_metrics_lr['recall_0'] += report_lr['0']['recall']
    average_metrics_lr['f1_0'] += report_lr['0']['f1-score']
    average_metrics_lr['precision_1'] += report_lr['1']['precision']
    average_metrics_lr['recall_1'] += report_lr['1']['recall']
    average_metrics_lr['f1_1'] += report_lr['1']['f1-score']

# Calculate average metrics for RandomForestClassifier
for metric in average_metrics_rf:
    average_metrics_rf[metric] /= num_iterations

# Calculate average metrics for LogisticRegression
for metric in average_metrics_lr:
    average_metrics_lr[metric] /= num_iterations

# Print the overall average metrics for RandomForestClassifier
print("\nOverall Average Metrics for RandomForestClassifier:")
print("Average Accuracy: {:.3f}".format(average_metrics_rf['accuracy']))
print("Average Precision (Class 0): {:.3f}".format(average_metrics_rf['precision_0']))
print("Average Recall (Class 0): {:.3f}".format(average_metrics_rf['recall_0']))
print("Average F1-Score (Class 0): {:.3f}".format(average_metrics_rf['f1_0']))
print("Average Precision (Class 1): {:.3f}".format(average_metrics_rf['precision_1']))
print("Average Recall (Class 1): {:.3f}".format(average_metrics_rf['recall_1']))
print("Average F1-Score (Class 1): {:.3f}".format(average_metrics_rf['f1_1']))

# Print the overall average metrics for LogisticRegression
print("\nOverall Average Metrics for LogisticRegression:")
print("Average Accuracy: {:.3f}".format(average_metrics_lr['accuracy']))
print("Average Precision (Class 0): {:.3f}".format(average_metrics_lr['precision_0']))
print("Average Recall (Class 0): {:.3f}".format(average_metrics_lr['recall_0']))
print("Average F1-Score (Class 0): {:.3f}".format(average_metrics_lr['f1_0']))
print("Average Precision (Class 1): {:.3f}".format(average_metrics_lr['precision_1']))
print("Average Recall (Class 1): {:.3f}".format(average_metrics_lr['recall_1']))
print("Average F1-Score (Class 1): {:.3f}".format(average_metrics_lr['f1_1']))

Iteration 1 - RandomForest - Accuracy: 0.8462
Iteration 1 - LogisticRegression - Accuracy: 0.8846
Iteration 2 - RandomForest - Accuracy: 0.7308
Iteration 2 - LogisticRegression - Accuracy: 0.9231
Iteration 3 - RandomForest - Accuracy: 0.8077
Iteration 3 - LogisticRegression - Accuracy: 0.9231
Iteration 4 - RandomForest - Accuracy: 0.8846
Iteration 4 - LogisticRegression - Accuracy: 0.9231
Iteration 5 - RandomForest - Accuracy: 0.8077
Iteration 5 - LogisticRegression - Accuracy: 0.8077
Iteration 6 - RandomForest - Accuracy: 0.8462
Iteration 6 - LogisticRegression - Accuracy: 0.8846
Iteration 7 - RandomForest - Accuracy: 0.9231
Iteration 7 - LogisticRegression - Accuracy: 0.8077
Iteration 8 - RandomForest - Accuracy: 0.9231
Iteration 8 - LogisticRegression - Accuracy: 0.8846
Iteration 9 - RandomForest - Accuracy: 0.9231
Iteration 9 - LogisticRegression - Accuracy: 0.8462
Iteration 10 - RandomForest - Accuracy: 1.0000
Iteration 10 - LogisticRegression - Accuracy: 0.9231
Iteration 11 - Ran

In [None]:
print(x_train)

[[ 0.00000000e+00  1.00000000e+00 -1.34530690e+00 -1.58488593e+00
  -1.43925910e+00]
 [ 1.00000000e+00  0.00000000e+00 -1.25511738e+00 -5.97747927e-01
  -7.84274070e-01]
 [ 0.00000000e+00  1.00000000e+00  6.12215622e-01  7.37791727e-01
   7.68783217e-01]
 [ 1.00000000e+00  0.00000000e+00 -3.90117902e-01 -2.78379749e-01
   9.35409184e-02]
 [ 1.00000000e+00  0.00000000e+00 -8.96409068e-01 -2.78379749e-01
  -7.16749840e-01]
 [ 0.00000000e+00  1.00000000e+00  2.88353257e-01  3.02289666e-01
   7.68783217e-01]
 [ 1.00000000e+00  0.00000000e+00 -1.52345532e-01  4.47457020e-01
   2.28589378e-01]
 [ 0.00000000e+00  1.00000000e+00 -5.45899799e-01 -1.33212395e-01
  -6.49225610e-01]
 [ 1.00000000e+00  0.00000000e+00  7.00355380e-01  1.17329379e+00
   1.17392860e+00]
 [ 1.00000000e+00  0.00000000e+00  1.32963225e+00  1.69589626e+00
   1.44402552e+00]
 [ 1.00000000e+00  0.00000000e+00 -5.90994559e-01 -2.78379749e-01
  -3.11604461e-01]
 [ 1.00000000e+00  0.00000000e+00  9.91421556e-01  8.82959081e-01