In [2]:
import numpy as np
import pandas as pd
import pickle

# Load the dataset
df = pd.read_csv('Fertilizer.csv')

# Display basic information about the dataset
print(df.head())
print(df.info())

# Check for missing values
print(df.isnull().sum())

# Split the data into features and target
x = df[['Nitrogen', 'Phosphorous', 'Potassium']].values
y = df['Fertilizer Name'].values  # Assuming 'Fertilizer_Type' is the target column

# Encode target labels
unique_labels, y_encoded = np.unique(y, return_inverse=True)

   Nitrogen  Potassium  Phosphorous                Fertilizer Name
0        37          0            0                           Urea
1        12          0           36                            DAP
2         7          9           30  Fourteen-Thirty Five-Fourteen
3        22          0           20      Twenty Eight-Twenty Eight
4        35          0            0                           Urea
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Nitrogen         99 non-null     int64 
 1   Potassium        99 non-null     int64 
 2   Phosphorous      99 non-null     int64 
 3   Fertilizer Name  99 non-null     object
dtypes: int64(3), object(1)
memory usage: 3.2+ KB
None
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64


In [3]:
class KNNModel:
    def __init__(self, k=3):
        self.k = k

    def fit(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train

    def euclidean_distance(self, a, b):
        return np.sqrt(np.sum((a - b) ** 2))

    def predict(self, x_test):
        predictions = []
        for test_point in x_test:
            distances = np.array([self.euclidean_distance(test_point, train_point) for train_point in self.x_train])
            nearest_indices = np.argsort(distances)[:self.k]
            nearest_labels = self.y_train[nearest_indices]
            unique, counts = np.unique(nearest_labels, return_counts=True)
            predicted_label = unique[np.argmax(counts)]
            predictions.append(predicted_label)
        return np.array(predictions)


In [4]:
# Split the data into training and testing sets
def train_test_split(x, y, test_size=0.25, random_state=None):
    if random_state:
        np.random.seed(random_state)
    indices = np.arange(x.shape[0])
    np.random.shuffle(indices)
    
    test_set_size = int(x.shape[0] * test_size)
    test_indices = indices[:test_set_size]
    train_indices = indices[test_set_size:]
    
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    return x_train, x_test, y_train, y_test

# Create training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.25, random_state=1)

# Initialize and train the model
knn_model = KNNModel(k=3)
knn_model.fit(x_train, y_train)

# Save the model and unique labels using pickle
pkl_filename = "knn_fertilizer_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump({'model': knn_model, 'labels': unique_labels}, file)
