In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Load dataset
df = pd.read_csv("Crop_recommendation.csv")

# Define synthetic data ranges
crop_ranges = {
    'rice': {'N': (90, 120), 'P': (40, 60), 'K': (40, 60), 'temperature': (25, 35), 'humidity': (70, 90), 'ph': (5.5, 6.5), 'rainfall': (150, 250)},
    'maize': {'N': (80, 100), 'P': (30, 60), 'K': (30, 60), 'temperature': (25, 30), 'humidity': (50, 70), 'ph': (6.0, 7.0), 'rainfall': (80, 120)},
    'jute': {'N': (40, 70), 'P': (20, 40), 'K': (20, 40), 'temperature': (24, 30), 'humidity': (70, 90), 'ph': (6.0, 7.0), 'rainfall': (150, 200)},
    'blackgram': {'N': (20, 50), 'P': (10, 30), 'K': (10, 30), 'temperature': (20, 30), 'humidity': (50, 80), 'ph': (6.0, 7.0), 'rainfall': (60, 100)},
    'coconut': {'N': (70, 90), 'P': (35, 50), 'K': (35, 50), 'temperature': (27, 31), 'humidity': (75, 85), 'ph': (6.0, 6.5), 'rainfall': (240, 270)}
}

# Generate synthetic rows
def generate_synthetic_crop(crop_name, n_samples, ranges):
    np.random.seed(42)
    return pd.DataFrame({
        'N': np.random.randint(*ranges['N'], n_samples),
        'P': np.random.randint(*ranges['P'], n_samples),
        'K': np.random.randint(*ranges['K'], n_samples),
        'temperature': np.random.uniform(*ranges['temperature'], n_samples),
        'humidity': np.random.uniform(*ranges['humidity'], n_samples),
        'ph': np.random.uniform(*ranges['ph'], n_samples),
        'rainfall': np.random.uniform(*ranges['rainfall'], n_samples),
        'label': [crop_name] * n_samples
    })

# Add synthetic samples
synthetic_data = pd.concat([
    generate_synthetic_crop(crop, 50, ranges)
    for crop, ranges in crop_ranges.items()
])
df = pd.concat([df, synthetic_data], ignore_index=True)

# Encode labels
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])

# Features and labels
X = df.drop(columns=['label', 'label_encoded'])
y = df['label_encoded']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

# Train model
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, target_names=le.classes_)
conf_matrix = confusion_matrix(y_test, y_pred)

# Save model and tools
joblib.dump(model, 'model.pkl')
joblib.dump(le, 'labelencoder.pkl')
joblib.dump(scaler, 'standscaler.pkl')

# Output metrics
print("\nClassification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


Parameters: { "use_label_encoder" } are not used.




Classification Report:
               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.97      0.98        30
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        30
      coffee       1.00      0.95      0.97        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.97      0.97      0.97        30
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        20
       maize       0.97      1.00      0.98        30
       mango       1.00      0.95      0.97        20
   mothbeans       0.95      1.00      0.98        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      orange       1.00      1.00      1.00        20
  

In [None]:
import joblib
import numpy as np

# Load model, scaler and label encoder with joblib
model = joblib.load('model.pkl')
scaler = joblib.load('standscaler.pkl')
le = joblib.load('labelencoder.pkl')

# Sample inputs
samples = [
    [90, 42, 43, 20.5, 82.0, 6.5, 200.0],
    [25, 10, 15, 30.0, 60.0, 5.5, 80.0],
    [110, 50, 60, 28.0, 75.0, 6.8, 220.0],
    [40, 60, 60, 18.0, 85.0, 6.2, 300.0],
    [15, 5, 5, 33.0, 40.0, 5.0, 20.0],
    [80, 40, 40, 29.0, 78.0, 6.2, 250.0]
]

# Scale inputs
scaled_samples = scaler.transform(samples)

# Predict
preds = model.predict(scaled_samples)

# Decode predictions
decoded_preds = le.inverse_transform(preds)

for i, pred in enumerate(decoded_preds):
    print(f"Sample {i+1} Prediction: {pred}")


Sample 1 Prediction: rice
Sample 2 Prediction: blackgram
Sample 3 Prediction: rice
Sample 4 Prediction: rice
Sample 5 Prediction: mothbeans
Sample 6 Prediction: coconut


