In [2]:
import numpy as np
import pandas as pd

# Load the dataset
df = pd.read_csv('Crop_recommendation.csv')

# Display basic information about the dataset
print(df.head())
print(df.info())

# Preprocess the data
# Drop 'label' column and convert to NumPy arrays
x = df.drop('label', axis=1).values
y = df['label'].values

# Encode target labels
unique_labels, y_encoded = np.unique(y, return_inverse=True)

# Function to compute Euclidean distance
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# k-NN prediction function
def predict(x_train, y_train, x_test, k=3):
    predictions = []
    for test_point in x_test:
        # Calculate distances from the test point to all training points
        distances = np.array([euclidean_distance(test_point, train_point) for train_point in x_train])
        # Get the indices of the k-nearest neighbors
        nearest_indices = np.argsort(distances)[:k]
        # Get the labels of the k-nearest neighbors
        nearest_labels = y_train[nearest_indices]
        # Predict the most common label among the neighbors
        unique, counts = np.unique(nearest_labels, return_counts=True)
        predicted_label = unique[np.argmax(counts)]
        predictions.append(predicted_label)
    return np.array(predictions)

# Split the data into training and testing sets
def train_test_split(x, y, test_size=0.25, random_state=None):
    if random_state:
        np.random.seed(random_state)
    indices = np.arange(x.shape[0])
    np.random.shuffle(indices)
    
    test_set_size = int(x.shape[0] * test_size)
    test_indices = indices[:test_set_size]
    train_indices = indices[test_set_size:]
    
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    return x_train, x_test, y_train, y_test

# Create training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.25, random_state=1)

# Predict the crop type for a new sample
new_sample = np.array([[14, 5, 36, 24.92639065, 85.192744, 5.802985, 104.735536]])
predicted_class_encoded = predict(x_train, y_train, new_sample, k=3)
predicted_class = unique_labels[predicted_class_encoded[0]]
print("Predicted crop for the new sample:", predicted_class)


    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB
None
Predicted

In [4]:
import numpy as np
import pandas as pd
import pickle

# Load the dataset
df = pd.read_csv('Crop_recommendation.csv')

# Preprocess the data
x = df.drop('label', axis=1).values
y = df['label'].values

# Encode target labels
unique_labels, y_encoded = np.unique(y, return_inverse=True)

class KNNModel:
    def __init__(self, k=3):
        self.k = k

    def fit(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train

    def euclidean_distance(self, a, b):
        return np.sqrt(np.sum((a - b) ** 2))

    def predict(self, x_test):
        predictions = []
        for test_point in x_test:
            # Calculate distances from the test point to all training points
            distances = np.array([self.euclidean_distance(test_point, train_point) for train_point in self.x_train])
            # Get the indices of the k-nearest neighbors
            nearest_indices = np.argsort(distances)[:self.k]
            # Get the labels of the k-nearest neighbors
            nearest_labels = self.y_train[nearest_indices]
            # Predict the most common label among the neighbors
            unique, counts = np.unique(nearest_labels, return_counts=True)
            predicted_label = unique[np.argmax(counts)]
            predictions.append(predicted_label)
        return np.array(predictions)

# Split the data into training and testing sets
def train_test_split(x, y, test_size=0.25, random_state=None):
    if random_state:
        np.random.seed(random_state)
    indices = np.arange(x.shape[0])
    np.random.shuffle(indices)
    
    test_set_size = int(x.shape[0] * test_size)
    test_indices = indices[:test_set_size]
    train_indices = indices[test_set_size:]
    
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    return x_train, x_test, y_train, y_test

# Create training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.25, random_state=1)

# Initialize and train the model
knn_model = KNNModel(k=3)
knn_model.fit(x_train, y_train)

# Save the model using pickle
pkl_filename = "knn_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(knn_model, file)

print("Model saved to", pkl_filename)

# Load the model using pickle
with open(pkl_filename, 'rb') as file:
    loaded_knn_model = pickle.load(file)

# Predict using the loaded model
new_sample = np.array([[14, 5, 36, 24.92639065, 85.192744, 5.802985, 104.735536]])
predicted_class_encoded = loaded_knn_model.predict(new_sample)
predicted_class = unique_labels[predicted_class_encoded[0]]
print("Predicted crop for the new sample:", predicted_class)


Model saved to knn_model.pkl
Predicted crop for the new sample: pomegranate


In [5]:
import pickle
import numpy as np
import pandas as pd

# Load the dataset
df = pd.read_csv('Crop_recommendation.csv')

# Preprocess the data
x = df.drop('label', axis=1).values
y = df['label'].values

# Encode target labels
unique_labels, y_encoded = np.unique(y, return_inverse=True)

class KNNModel:
    def __init__(self, k=3):
        self.k = k

    def fit(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train

    def euclidean_distance(self, a, b):
        return np.sqrt(np.sum((a - b) ** 2))

    def predict(self, x_test):
        predictions = []
        for test_point in x_test:
            distances = np.array([self.euclidean_distance(test_point, train_point) for train_point in self.x_train])
            nearest_indices = np.argsort(distances)[:self.k]
            nearest_labels = self.y_train[nearest_indices]
            unique, counts = np.unique(nearest_labels, return_counts=True)
            predicted_label = unique[np.argmax(counts)]
            predictions.append(predicted_label)
        return np.array(predictions)

# Split the data into training and testing sets
def train_test_split(x, y, test_size=0.25, random_state=None):
    if random_state:
        np.random.seed(random_state)
    indices = np.arange(x.shape[0])
    np.random.shuffle(indices)
    
    test_set_size = int(x.shape[0] * test_size)
    test_indices = indices[:test_set_size]
    train_indices = indices[test_set_size:]
    
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    
    return x_train, x_test, y_train, y_test

# Create training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.25, random_state=1)

# Initialize and train the model
knn_model = KNNModel(k=3)
knn_model.fit(x_train, y_train)

# Save the model and unique labels using pickle
pkl_filename = "knn_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump({'model': knn_model, 'labels': unique_labels}, file)
