In [4]:
import pandas as pd
pd.read_excel('Train/Belt 1 9 Months negative data.xlsx')

Unnamed: 0,Name,Timestamp,Status,Description,Vibration Frequency,Vibration Amplitude,Bearing Temperature,Motor Temperature,Belt Load,Torque,Noise Levels,Current and Voltage,Hydraulic Pressure,Belt Thickness,Roller Condition
0,Conveyor Belt 4,2025-08-15 00:00:00,Running,,1490.82,0.04,77.130,96.902,1.36,318.07,55.12,15.79,382.09,1.58000,86.000
1,Conveyor Belt 4,2025-08-15 00:15:00,Running,,1498.37,0.04,77.076,96.975,1.07,295.50,59.68,14.34,376.48,1.57950,85.854
2,Conveyor Belt 4,2025-08-15 00:30:00,Running,,1503.22,0.06,77.307,96.755,1.21,314.38,58.20,15.03,384.20,1.57925,85.781
3,Conveyor Belt 4,2025-08-15 00:45:00,Running,,1508.11,0.04,77.474,97.661,1.29,311.84,56.16,15.43,379.79,1.57900,85.708
4,Conveyor Belt 4,2025-08-15 01:00:00,Running,,1498.13,0.06,77.785,97.471,1.07,317.14,55.39,14.35,383.95,1.57875,85.635
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26204,Conveyor Belt 4,2026-05-13 23:00:00,Running,,1502.84,0.05,76.119,95.287,1.39,305.42,62.72,15.93,377.00,1.63700,68.610
26205,Conveyor Belt 4,2026-05-13 23:15:00,Running,,1503.36,0.05,75.889,95.888,1.06,312.37,64.70,14.28,377.08,1.63675,68.537
26206,Conveyor Belt 4,2026-05-13 23:30:00,Running,,1498.34,0.04,76.001,95.738,1.28,287.36,57.81,15.42,384.19,1.63650,68.464
26207,Conveyor Belt 4,2026-05-13 23:45:00,Running,,1495.26,0.05,76.106,95.848,1.30,297.35,58.84,15.49,376.62,1.63625,68.391


In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 1. Load Data and Preprocess
df = pd.read_excel('Train/Belt 1 9 Months negative data.xlsx', parse_dates=['Timestamp'])
df = df.set_index('Timestamp')

# Define the expected ranges as a dictionary
expected_ranges_data = {
    'Vibration Frequency': {'Lower Bound': 1490, 'Upper Bound': 1510},
    'Vibration Amplitude': {'Lower Bound': 0.04, 'Upper Bound': 0.06},
    'Bearing Temperature': {'Lower Bound': 60, 'Upper Bound': 80},
    'Motor Temperature': {'Lower Bound': 80, 'Upper Bound': 100},
    'Belt Load': {'Lower Bound': 1.0, 'Upper Bound': 1.4},
    'Torque': {'Lower Bound': 280, 'Upper Bound': 320},
    'Noise Levels': {'Lower Bound': 55, 'Upper Bound': 65},
    'Current and Voltage': {'Lower Bound': 14, 'Upper Bound': 16},
    'Hydraulic Pressure': {'Lower Bound': 375, 'Upper Bound': 385},
    'Belt Thickness': {'Lower Bound': 1.5, 'Upper Bound': 1.7},
    'Roller Condition': {'Lower Bound': 65, 'Upper Bound': 100}
}

# Create a Pandas DataFrame from the dictionary
expected_ranges = pd.DataFrame(expected_ranges_data).T

def create_anomaly_features(df, expected_ranges):
    for col in df.columns:
        if col in expected_ranges.index:
            lower_bound = expected_ranges.loc[col, 'Lower Bound']
            upper_bound = expected_ranges.loc[col, 'Upper Bound']
            if pd.api.types.is_numeric_dtype(df[col]):
                df[f'{col}_anomaly'] = ((df[col] < lower_bound) | (df[col] > upper_bound)).astype(int)
                df[f'{col}_deviation'] = np.where(df[col] < lower_bound, df[col] - lower_bound, np.where(df[col] > upper_bound, df[col] - upper_bound, 0))
            else:
                print(f"Warning: Column '{col}' is not numeric and will be skipped.")
    return df

df = create_anomaly_features(df, expected_ranges)

def create_rolling_lag_features(df, column, window_sizes, lag_sizes):
    for window in window_sizes:
        df[f'{column}_rolling_mean_{window}h'] = df[column].rolling(window * 4).mean()
        df[f'{column}_rolling_std_{window}h'] = df[column].rolling(window * 4).std()
    for lag in lag_sizes:
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

for col in ['Motor Temperature', 'Roller Condition', 'Torque', 'Vibration Amplitude']:
    df = create_rolling_lag_features(df, col, [1, 24], [1, 4])

df['Maintenance'] = (df['Status'] == 'Maintenance').astype(int)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_descriptions = encoder.fit_transform(df[['Description']])
encoded_df = pd.DataFrame(encoded_descriptions, index=df.index)
df = pd.concat([df, encoded_df], axis=1)

df = df.drop(['Status', 'Description'], axis=1)
df = df.dropna()

# 2. Prepare Data for XGBoost
X = df.drop(['Maintenance', 'Name'], axis=1) # Remove the name column
y = df['Maintenance']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# 3. Train XGBoost Model
model = xgb.XGBClassifier(
    objective='binary:logistic',
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    random_state=42
)

model.fit(X_train, y_train)

# 4. Make Predictions and Evaluate
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')

# 5. Feature Importance.
feature_importance = pd.DataFrame(model.feature_importances_, index=X_train.columns, columns=['importance']).sort_values('importance', ascending=False)
print('\nFeature Importance:\n', feature_importance.head(10))

Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000

Feature Importance:
                          importance
Motor Temperature          0.993974
Torque                     0.001894
Noise Levels               0.001633
Vibration Frequency        0.001412
Bearing Temperature        0.001087
Roller Condition_lag_4     0.000000
Torque_lag_4               0.000000
Torque_lag_1               0.000000
Torque_rolling_std_24h     0.000000
Torque_rolling_mean_24h    0.000000


In [18]:
# 1. Load Data and Preprocess
df = pd.read_excel('Train/Belt 1 9 Months negative data.xlsx', parse_dates=['Timestamp'])
df = df.set_index('Timestamp')
# Create a Pandas DataFrame from the dictionary
expected_ranges = pd.DataFrame(expected_ranges_data).T

def create_anomaly_features(df, expected_ranges):
    for col in df.columns:
        if col in expected_ranges.index:
            lower_bound = expected_ranges.loc[col, 'Lower Bound']
            upper_bound = expected_ranges.loc[col, 'Upper Bound']
            if pd.api.types.is_numeric_dtype(df[col]):
                df[f'{col}_anomaly'] = ((df[col] < lower_bound) | (df[col] > upper_bound)).astype(int)
                df[f'{col}_deviation'] = np.where(df[col] < lower_bound, df[col] - lower_bound, np.where(df[col] > upper_bound, df[col] - upper_bound, 0))
            else:
                print(f"Warning: Column '{col}' is not numeric and will be skipped.")
    return df

df = create_anomaly_features(df, expected_ranges)
df

Unnamed: 0_level_0,Name,Status,Description,Vibration Frequency,Vibration Amplitude,Bearing Temperature,Motor Temperature,Belt Load,Torque,Noise Levels,...,Noise Levels_anomaly,Noise Levels_deviation,Current and Voltage_anomaly,Current and Voltage_deviation,Hydraulic Pressure_anomaly,Hydraulic Pressure_deviation,Belt Thickness_anomaly,Belt Thickness_deviation,Roller Condition_anomaly,Roller Condition_deviation
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-08-15 00:00:00,Conveyor Belt 4,Running,,1490.82,0.04,77.130,96.902,1.36,318.07,55.12,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2025-08-15 00:15:00,Conveyor Belt 4,Running,,1498.37,0.04,77.076,96.975,1.07,295.50,59.68,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2025-08-15 00:30:00,Conveyor Belt 4,Running,,1503.22,0.06,77.307,96.755,1.21,314.38,58.20,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2025-08-15 00:45:00,Conveyor Belt 4,Running,,1508.11,0.04,77.474,97.661,1.29,311.84,56.16,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2025-08-15 01:00:00,Conveyor Belt 4,Running,,1498.13,0.06,77.785,97.471,1.07,317.14,55.39,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2026-05-13 23:00:00,Conveyor Belt 4,Running,,1502.84,0.05,76.119,95.287,1.39,305.42,62.72,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2026-05-13 23:15:00,Conveyor Belt 4,Running,,1503.36,0.05,75.889,95.888,1.06,312.37,64.70,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2026-05-13 23:30:00,Conveyor Belt 4,Running,,1498.34,0.04,76.001,95.738,1.28,287.36,57.81,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2026-05-13 23:45:00,Conveyor Belt 4,Running,,1495.26,0.05,76.106,95.848,1.30,297.35,58.84,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 1. Load Data and Preprocess
df = pd.read_excel('Train/Belt 1 9 Months negative data.xlsx', parse_dates=['Timestamp'])
df = df.set_index('Timestamp')

# Define the expected ranges as a dictionary
expected_ranges_data = {
    'Vibration Frequency': {'Lower Bound': 1490, 'Upper Bound': 1510},
    'Vibration Amplitude': {'Lower Bound': 0.04, 'Upper Bound': 0.06},
    'Bearing Temperature': {'Lower Bound': 60, 'Upper Bound': 80},
    'Motor Temperature': {'Lower Bound': 80, 'Upper Bound': 100},
    'Belt Load': {'Lower Bound': 1.0, 'Upper Bound': 1.4},
    'Torque': {'Lower Bound': 280, 'Upper Bound': 320},
    'Noise Levels': {'Lower Bound': 55, 'Upper Bound': 65},
    'Current and Voltage': {'Lower Bound': 14, 'Upper Bound': 16},
    'Hydraulic Pressure': {'Lower Bound': 375, 'Upper Bound': 385},
    'Belt Thickness': {'Lower Bound': 1.5, 'Upper Bound': 1.7},
    'Roller Condition': {'Lower Bound': 65, 'Upper Bound': 100}
}

# Create a Pandas DataFrame from the dictionary
expected_ranges = pd.DataFrame(expected_ranges_data).T

def create_anomaly_features(df, expected_ranges):
    for col in df.columns:
        if col in expected_ranges.index:
            lower_bound = expected_ranges.loc[col, 'Lower Bound']
            upper_bound = expected_ranges.loc[col, 'Upper Bound']
            if pd.api.types.is_numeric_dtype(df[col]):
                df[f'{col}_anomaly'] = ((df[col] < lower_bound) | (df[col] > upper_bound)).astype(int)
                df[f'{col}_deviation'] = np.where(df[col] < lower_bound, df[col] - lower_bound, np.where(df[col] > upper_bound, df[col] - upper_bound, 0))
            else:
                print(f"Warning: Column '{col}' is not numeric and will be skipped.")
    return df

df = create_anomaly_features(df, expected_ranges)

def create_rolling_lag_features(df, column, window_sizes, lag_sizes):
    for window in window_sizes:
        df[f'{column}_rolling_mean_{window}h'] = df[column].rolling(window * 4).mean()
        df[f'{column}_rolling_std_{window}h'] = df[column].rolling(window * 4).std()
    for lag in lag_sizes:
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

for col in ['Motor Temperature', 'Roller Condition', 'Torque', 'Vibration Amplitude']:
    df = create_rolling_lag_features(df, col, [1, 24], [1, 4])

df['Maintenance'] = (df['Status'] == 'Maintenance').astype(int)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_descriptions = encoder.fit_transform(df[['Description']])
encoded_df = pd.DataFrame(encoded_descriptions, index=df.index)
df = pd.concat([df, encoded_df], axis=1)

df = df.drop(['Status', 'Description'], axis=1)
df = df.dropna()

# 2. Prepare Data for XGBoost
X = df.drop(['Maintenance', 'Name'], axis=1) # Remove the name column
y = df['Maintenance']

In [26]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load and preprocess data
data = pd.read_excel('Train/Belt 1 9 Months negative data.xlsx')

# Define the expected ranges as a dictionary
expected_ranges_data = {
    'Vibration Frequency': {'Lower Bound': 1490, 'Upper Bound': 1510},
    'Vibration Amplitude': {'Lower Bound': 0.04, 'Upper Bound': 0.06},
    'Bearing Temperature': {'Lower Bound': 60, 'Upper Bound': 80},
    'Motor Temperature': {'Lower Bound': 80, 'Upper Bound': 100},
    'Belt Load': {'Lower Bound': 1.0, 'Upper Bound': 1.4},
    'Torque': {'Lower Bound': 280, 'Upper Bound': 320},
    'Noise Levels': {'Lower Bound': 55, 'Upper Bound': 65},
    'Current and Voltage': {'Lower Bound': 14, 'Upper Bound': 16},
    'Hydraulic Pressure': {'Lower Bound': 375, 'Upper Bound': 385},
    'Belt Thickness': {'Lower Bound': 1.5, 'Upper Bound': 1.7},
    'Roller Condition': {'Lower Bound': 65, 'Upper Bound': 100}
}

# List of sensor columns
sensor_cols = ['Vibration Frequency', 'Vibration Amplitude', 'Bearing Temperature',
               'Motor Temperature', 'Belt Load', 'Torque', 'Noise Levels',
               'Current and Voltage', 'Hydraulic Pressure', 'Belt Thickness', 'Roller Condition']

# Create spike features using the expected ranges from the dictionary
for col in sensor_cols:
    exp_min = expected_ranges_data[col]['Lower Bound']
    exp_max = expected_ranges_data[col]['Upper Bound']
    data[f'{col}_spike'] = ((data[col] < exp_min) | (data[col] > exp_max)).astype(int)
    
# Create rolling mean features (e.g., over 3 intervals)
for col in sensor_cols:
    data[f'{col}_roll_mean'] = data[col].rolling(window=3).mean()

# Create label column: assume 'Status' is a string where "Running" means normal operation
data['label'] = data['Status'].apply(lambda x: 0 if x.lower() == 'running' else 1)
data.dropna(inplace=True)

# Define features for the model: original sensor readings, spike indicators, and rolling means
feature_cols = sensor_cols + [f'{col}_spike' for col in sensor_cols] + [f'{col}_roll_mean' for col in sensor_cols]
X = data[feature_cols]
y = data['label']

# Split data (keeping time order in mind)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0)

# Train an XGBoost classifier
model = xgb.XGBClassifier(objective='binary:logistic', eval_metric='logloss')
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print('Test Accuracy:', accuracy_score(y_test, y_pred))


InvalidParameterError: The 'test_size' parameter of train_test_split must be a float in the range (0.0, 1.0), an int in the range [1, inf) or None. Got 0 instead.

In [42]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load your training and test data
train_data = pd.read_excel('Train/Belt 1 9 Months negative data.xlsx')
test_data  = pd.read_excel('Test/Belt 1 Test Data.xlsx')

# Define expected ranges as a dictionary
expected_ranges_data = {
    'Vibration Frequency': {'Lower Bound': 1490, 'Upper Bound': 1510},
    'Vibration Amplitude': {'Lower Bound': 0.04, 'Upper Bound': 0.06},
    'Bearing Temperature': {'Lower Bound': 60, 'Upper Bound': 80},
    'Motor Temperature': {'Lower Bound': 80, 'Upper Bound': 100},
    'Belt Load': {'Lower Bound': 1.0, 'Upper Bound': 1.4},
    'Torque': {'Lower Bound': 280, 'Upper Bound': 320},
    'Noise Levels': {'Lower Bound': 55, 'Upper Bound': 65},
    'Current and Voltage': {'Lower Bound': 14, 'Upper Bound': 16},
    'Hydraulic Pressure': {'Lower Bound': 375, 'Upper Bound': 385},
    'Belt Thickness': {'Lower Bound': 1.5, 'Upper Bound': 1.7},
    'Roller Condition': {'Lower Bound': 65, 'Upper Bound': 100}
}

# List of sensor columns
sensor_cols = ['Vibration Frequency', 'Vibration Amplitude', 'Bearing Temperature',
               'Motor Temperature', 'Belt Load', 'Torque', 'Noise Levels',
               'Current and Voltage', 'Hydraulic Pressure', 'Belt Thickness', 'Roller Condition']

# Process training data
for col in sensor_cols:
    exp_min = expected_ranges_data[col]['Lower Bound']
    exp_max = expected_ranges_data[col]['Upper Bound']
    train_data[f'{col}_spike'] = ((train_data[col] < exp_min) | (train_data[col] > exp_max)).astype(int)

for col in sensor_cols:
    train_data[f'{col}_roll_mean'] = train_data[col].rolling(window=3).mean()

# Create label column for training data: adjust lambda if needed based on your actual status strings
train_data['label'] = train_data['Status'].apply(lambda x: 0 if x.lower() == 'running' else 1)
train_data.dropna(inplace=True)

# Define features for training
feature_cols = sensor_cols + [f'{col}_spike' for col in sensor_cols] + [f'{col}_roll_mean' for col in sensor_cols]
X = train_data[feature_cols]
y = train_data['label']

# Split your training data for internal evaluation (if possible)
X_train, X_val, y_train, y_val = train_test_split(X, y, shuffle=False, test_size=0.2)

model = xgb.XGBClassifier(objective='binary:logistic', eval_metric='logloss')
model.fit(X_train, y_train)

# Evaluate on your validation set
y_val_pred = model.predict(X_val)
print('Validation Accuracy:', accuracy_score(y_val, y_val_pred))

# Process the test data similarly (note: test data may not have labels)
for col in sensor_cols:
    exp_min = expected_ranges_data[col]['Lower Bound']
    exp_max = expected_ranges_data[col]['Upper Bound']
    test_data[f'{col}_spike'] = ((test_data[col] < exp_min) | (test_data[col] > exp_max)).astype(int)

for col in sensor_cols:
    test_data[f'{col}_roll_mean'] = test_data[col].rolling(window=3).mean()

test_data.dropna(inplace=True)
test_features = sensor_cols + [f'{col}_spike' for col in sensor_cols] + [f'{col}_roll_mean' for col in sensor_cols]
X_test_live = test_data[test_features]

# Predict probabilities on test data
predicted_probs = model.predict_proba(X_test_live)[:, 1]  # Probability of "Down" event
test_data['predicted_down_prob'] = predicted_probs

print(test_data[['Timestamp', 'predicted_down_prob']])


ValueError: Invalid classes inferred from unique values of `y`.  Expected: [0], got [1]

In [32]:
train_data = pd.read_excel('Train/Belt 1 9 Months negative data.xlsx')
train_data["Status"].unique()

array(['Running', 'Maintenance', 'Down'], dtype=object)

In [41]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load your training and test data
train_data = pd.read_excel('Train/Belt 1 9 Months negative data.xlsx')
test_data  = pd.read_excel('Test/Belt 1 Test Data.xlsx')

# Define expected ranges as a dictionary
expected_ranges_data = {
    'Vibration Frequency': {'Lower Bound': 1490, 'Upper Bound': 1510},
    'Vibration Amplitude': {'Lower Bound': 0.04, 'Upper Bound': 0.06},
    'Bearing Temperature': {'Lower Bound': 60, 'Upper Bound': 80},
    'Motor Temperature': {'Lower Bound': 80, 'Upper Bound': 100},
    'Belt Load': {'Lower Bound': 1.0, 'Upper Bound': 1.4},
    'Torque': {'Lower Bound': 280, 'Upper Bound': 320},
    'Noise Levels': {'Lower Bound': 55, 'Upper Bound': 65},
    'Current and Voltage': {'Lower Bound': 14, 'Upper Bound': 16},
    'Hydraulic Pressure': {'Lower Bound': 375, 'Upper Bound': 385},
    'Belt Thickness': {'Lower Bound': 1.5, 'Upper Bound': 1.7},
    'Roller Condition': {'Lower Bound': 65, 'Upper Bound': 100}
}

# List of sensor columns
sensor_cols = ['Vibration Frequency', 'Vibration Amplitude', 'Bearing Temperature',
               'Motor Temperature', 'Belt Load', 'Torque', 'Noise Levels',
               'Current and Voltage', 'Hydraulic Pressure', 'Belt Thickness', 'Roller Condition']

# Process training data
for col in sensor_cols:
    exp_min = expected_ranges_data[col]['Lower Bound']
    exp_max = expected_ranges_data[col]['Upper Bound']
    train_data[f'{col}_spike'] = ((train_data[col] < exp_min) | (train_data[col] > exp_max)).astype(int)

for col in sensor_cols:
    train_data[f'{col}_roll_mean'] = train_data[col].rolling(window=3).mean()

# Create label column for training data: adjust lambda if needed based on your actual status strings
train_data['label'] = train_data['Status'].apply(lambda x: 0 if (x.lower() == 'running') else 1)
print(f"unique is {train_data['label'].unique()}")

# Define features for training
feature_cols = sensor_cols + [f'{col}_spike' for col in sensor_cols] + [f'{col}_roll_mean' for col in sensor_cols]
X = train_data[feature_cols]
y = train_data['label']

X_train, X_val, y_train, y_val = train_test_split(X, y, shuffle=False, test_size=0.2)

# Train an XGBoost classifier
model = xgb.XGBClassifier(objective='binary:logistic', eval_metric='logloss')
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_val)
print('Test Accuracy:', accuracy_score(y_val, y_pred))

unique is [0 1]
Test Accuracy: 1.0
