In [15]:
!pip install pandas scikit-learn xgboost numpy joblib




In [25]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

# Load dataset
df = pd.read_csv('crop_dataset.csv')

# Features and targets
features = ['Temparature', 'Humidity', 'Moisture', 'Soil Type']  # Removed 'Crop Type'
target = ['Nitrogen', 'Phosphorous', 'Potassium']

# Split into X and y
X = df[features]
y = df[target]

# Define categorical and numeric features
categorical_features = ['Soil Type']
numeric_features = ['Temparature', 'Humidity', 'Moisture']

# One-hot encode categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_features)
    ],
    remainder='passthrough'  # Keep numeric features as they are
)

# Transform features
X_encoded = preprocessor.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Model
model = MultiOutputRegressor(RandomForestRegressor(random_state=42))
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
mae_n = mean_absolute_error(y_test['Nitrogen'], y_pred[:, 0])
mae_p = mean_absolute_error(y_test['Phosphorous'], y_pred[:, 1])
mae_k = mean_absolute_error(y_test['Potassium'], y_pred[:, 2])

print(f"Nitrogen MAE: {mae_n:.2f}")
print(f"Phosphorous MAE: {mae_p:.2f}")
print(f"Potassium MAE: {mae_k:.2f}")


Nitrogen MAE: 9.65
Phosphorous MAE: 10.21
Potassium MAE: 4.53


In [31]:
import joblib

# Save model
joblib.dump(model, 'npk_predictor_model.pkl')

# Save encoder, imputer, and scaler
joblib.dump(preprocessor, 'preprocessor.pkl')
joblib.dump(model, 'model.pkl')

['model.pkl']

In [27]:
def predict_npk(temperature, humidity, moisture, soil_type):
    # Input must match the format used during training
    input_df = pd.DataFrame([{
        'Temparature': temperature,
        'Humidity': humidity,
        'Moisture': moisture,
        'Soil Type': soil_type
    }])

    # Apply same preprocessing (OneHotEncoder + passthrough numeric)
    input_encoded = preprocessor.transform(input_df)

    # Predict
    prediction = model.predict(input_encoded)
    n, p, k = prediction[0]

    print(f"Predicted NPK for:")
    print(f"  Soil Type: {soil_type}")
    print(f"  Nitrogen: {round(n)}")
    print(f"  Phosphorous: {round(p)}")
    print(f"  Potassium: {round(k)}")

predict_npk(30, 45, 42, "Black")


Predicted NPK for:
  Soil Type: Black
  Nitrogen: 18
  Phosphorous: 16
  Potassium: 6
