In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
import joblib



In [2]:
# Load dataset
data = pd.read_csv("soil_quality_10000.csv")

# Encode target column (Good=1, Bad=0)
#le = LabelEncoder()
#data["soil_quality"] = le.fit_transform(data["soil_quality"])

# Features & Target
X = data.drop("soil_quality", axis=1)
y = data["soil_quality"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)



In [3]:
# Create ML Pipeline (Scaler + Model)
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", RandomForestClassifier(
        n_estimators=150,
        random_state=42,
        class_weight="balanced"
    ))
])




In [4]:
# Train model
pipeline.fit(X_train, y_train)

# Predictions
y_pred = pipeline.predict(X_test)


In [5]:

# Model Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred)*100)


Accuracy: 99.95


In [6]:
print(classification_report(y_test, y_pred))

# Save trained pipeline (ONE file only)


              precision    recall  f1-score   support

         Bad       1.00      1.00      1.00      1946
        Good       1.00      0.98      0.99        54

    accuracy                           1.00      2000
   macro avg       1.00      0.99      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [7]:
joblib.dump(pipeline, "soil_quality_model.pkl")

print("Model saved as soil_quality_model.pkl")

Model saved as soil_quality_model.pkl


In [8]:
data

Unnamed: 0,N,P,K,ph,moisture,organic_carbon,soil_quality
0,61,8,14,4.72,29,0.35,Bad
1,24,36,54,5.60,57,0.62,Bad
2,81,37,59,7.47,51,0.93,Good
3,70,53,32,8.19,16,0.47,Bad
4,30,30,42,4.62,24,0.51,Bad
...,...,...,...,...,...,...,...
9995,32,28,50,5.20,40,0.37,Bad
9996,77,23,33,5.80,30,0.53,Bad
9997,23,21,39,7.33,68,1.03,Bad
9998,72,47,57,8.40,67,1.06,Bad
