In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import pickle

# Load your dataset
df = pd.read_csv("check_valve_qc_dataset.csv")

X = df[['valve_id', 'body_height', 'inlet_radius', 'outlet_radius',
        'disc_thickness', 'spring_length']]
y = df[['defective']]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# GridSearchCV
param_grid = {
    'n_neighbors': np.arange(1, 6),
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto'],
    'p': [1, 2]
}
grid = GridSearchCV(KNeighborsClassifier(), param_grid, refit=True, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train_scaled, y_train.values.ravel())

# Save the model and scaler
pickle.dump(grid, open("knn_check_valve_model.pkl", "wb"))
pickle.dump(scaler, open("knn_check_valve_scaler.pkl", "wb"))