# Train KNN Model

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load data
data = pd.read_csv('agriculture_quality_expanded_with_reordered_batch.csv')

# Encode categorical variables
le_type = LabelEncoder()
le_quality = LabelEncoder()
data['Type'] = le_type.fit_transform(data['Type'])
data['Overall Quality'] = le_quality.fit_transform(data['Overall Quality'])

# Separate features and target
X = data.drop(['Product', 'Overall Quality'], axis=1)
y = data['Overall Quality']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict on the test set
y_pred = knn.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Save the model
joblib.dump(knn, 'knn_model.pkl')
print('Model saved as knn_model.pkl')


Accuracy: 1.00
Model saved as knn_model.pkl
