In [43]:
import kagglehub
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder

# Download latest version
path = kagglehub.dataset_download("atharvaingle/crop-recommendation-dataset")

print("Path to dataset files:", path)

Path to dataset files: /home/galich/.cache/kagglehub/datasets/atharvaingle/crop-recommendation-dataset/versions/1


In [44]:
# Load the dataset
df = pd.read_csv(f"{path}/Crop_recommendation.csv")

# Display the first few rows
print(df.head())

# Check column names
print(df.columns)


    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice
Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')


In [45]:
# Encode labels as integers
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])  # Converts crop names to integers

# Standardize input features
scaler = StandardScaler()
x = scaler.fit_transform(df.drop(columns=['label']))

# One-hot encode labels for multi-class classification
encoder = OneHotEncoder(sparse_output=False)
y_one_hot = encoder.fit_transform(df[['label']])  # Convert labels to one-hot

# Convert to float
x = x.astype(float)
y_one_hot = y_one_hot.astype(float)

print(x.shape, y_one_hot.shape)

(2200, 7) (2200, 22)


In [65]:
def outer_product(vec_a, vec_b):
    assert len(vec_a) > 0 and len(vec_b) > 0, "Input vectors must not be empty"

    out = np.zeros((len(vec_a), len(vec_b)))

    for i in range(len(vec_a)):  # Iterate over vec_a (rows)
        for j in range(len(vec_b)):  # Iterate over vec_b (columns)
            out[i][j] = vec_a[i] * vec_b[j]  # Multiply element-wise

    return out


def softmax(x):
    exp_x = np.exp(x - np.max(x))  # Stabilize softmax
    return exp_x / np.sum(exp_x)  # Convert scores into probabilities

def neural_network(input, weights):
    return softmax(np.dot(input, weights))  # Matrix multiplication + softmax


In [66]:
# Assuming num_inputs = 7 (features) and num_classes = 22 (crops)
num_inputs = 7
num_classes = 22

weights = np.random.randn(num_inputs, num_classes) * 0.01  # Small random weights
alpha = 0.01  # Learning rate
epochs = 1

for epoch in range(epochs):
    total_loss = 0

    for i in range(len(x)):  # Loop over dataset
        input = x[i]  # Features
        true = y_one_hot[i]  # One-hot encoded label

        # Forward pass
        pred = neural_network(input, weights)

        # Compute loss (cross-entropy)
        loss = (pred - true) ** 2  # Avoid log(0)
        total_loss += loss

        # Compute gradient (backpropagation)
        delta = pred - true  # Gradient of softmax + cross-entropy
        weight_deltas = outer_product(input, delta) # Outer product
        # Update weights
        weights -= alpha * weight_deltas

    print(f"Epoch {epoch+1}, Loss: {total_loss}")

print("Final Weights:", weights)


Epoch 1, Loss: [22.10278918 68.21769823 81.6406532  38.98094904 58.63676506 68.47564937
 66.81508742 31.69342364 81.43433857 48.4622307  78.9538973  85.74231611
 63.51649956 79.71259919 76.87102626 55.34089097 68.95283194 68.00145701
 72.45124246 76.7523029  50.88950414 70.83243408]
Final Weights: [[-2.62708745e-01  7.58858479e-01 -1.91658149e-01  2.13186162e-02
  -6.22741182e-01  1.07875332e+00  1.19209791e+00 -6.94783335e-02
   4.41119986e-01 -1.87893753e-01 -6.54002568e-01  3.27314797e-01
  -5.77894038e-01 -3.71781630e-01 -4.93989495e-01  6.05076667e-01
  -6.25004224e-01 -1.54922867e-02 -4.53965718e-01 -4.74037081e-01
   2.21428175e-01  4.34258318e-01]
 [ 8.28740887e-01  7.34210381e-01  3.98668109e-01  8.74049406e-02
  -7.34585579e-01 -5.49450126e-01 -3.05030191e-02  3.37359398e-01
   4.60011538e-04  1.76795246e-01  4.87320055e-01  3.10054905e-02
  -4.87951422e-01 -3.63745959e-02  5.88774130e-02 -5.84912173e-01
  -6.87915881e-01  2.86278357e-01  4.50364796e-01 -4.22845184e-01
   8.5

In [67]:
def predict_crop(input_features):
    input_features = scaler.transform([input_features])  # Normalize input
    probs = neural_network(input_features[0], weights)  # Get probability distribution
    predicted_index = np.argmax(probs)  # Crop with highest probability
    return le.inverse_transform([predicted_index])[0]  # Convert index to crop name

# Example prediction
test_input = [100, 48, 17, 23, 83, 7, 66]  # Example soil & climate values
predicted_crop = predict_crop(test_input)
print("Predicted Crop:", predicted_crop)


Predicted Crop: cotton


