In [1]:
import pandas as pd


In [11]:
file_path=r"C:\Users\user-pc\Desktop\figma\soil_db.csv"
df=pd.read_csv(file_path)

In [13]:
df.head()

Unnamed: 0,Clay %,Sand %,Silt %,Classification
0,0,100,0,1
1,0,95,5,1
2,0,90,10,1
3,0,85,15,2
4,0,80,20,2


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231 entries, 0 to 230
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   Clay %          231 non-null    int64
 1   Sand %          231 non-null    int64
 2   Silt %          231 non-null    int64
 3   Classification  231 non-null    int64
dtypes: int64(4)
memory usage: 7.3 KB


In [15]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np

In [18]:
X=df[["Clay %","Sand %","Silt %"]]
y=df["Classification"]

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [20]:
# Step 3: Creating kNN Classifier Pipeline
knn_pipeline = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=3))

In [21]:
#training model
knn_pipeline.fit(X_train,y_train)

In [22]:
# Function: Convert 10 diameters → clay/silt/sand %
"""
According to USDA soil particle size definitions:

Type	Diameter Range (mm)
Clay	< 0.002
Silt	0.002–0.05
Sand	0.05–2.00
"""
def diameters_to_composition(diameters):
    clay = sum(d < 0.002 for d in diameters)
    silt = sum(0.002 <= d < 0.05 for d in diameters)
    sand = sum(0.05 <= d <= 2.0 for d in diameters)
    total = clay + silt + sand
    return [
        round((clay / total) * 100, 2),
        round((sand / total) * 100, 2),
        round((silt / total) * 100, 2)
    ]


In [24]:
# Function: Predict soil type from 10 diameter samples
def predict_soil_type(diameters):
    composition = diameters_to_composition(diameters)
    prediction = knn_pipeline.predict([composition])[0]
    return composition, prediction

In [26]:
#example input
sample_diameters = [0.001, 0.0025, 0.045, 0.06, 0.08, 0.003, 0.0015, 0.09, 0.5, 1.0]
composition, predicted_class = predict_soil_type(sample_diameters)
composition, predicted_class



([20.0, 50.0, 30.0], 7)

In [27]:
crop_recommendations = {
    1: ["Carrots", "Potatoes", "Watermelon"],              # Sand
    2: ["Cantaloupe", "Peanuts", "Onions"],                # Loamy Sand
    3: ["Tomatoes", "Corn", "Groundnut"],                  # Sandy Loam
    4: ["Wheat", "Barley", "Sugarcane"],                   # Loam
    5: ["Soybeans", "Vegetables", "Rice"],                 # Silt Loam
    6: ["Rice", "Mustard", "Peas"],                        # Silt
    7: ["Maize", "Sorghum", "Sunflower"],                  # Sandy Clay Loam
    8: ["Paddy", "Sugar beet", "Cotton"],                  # Clay Loam
    9: ["Turmeric", "Ginger", "Spinach"],                  # Silty Clay Loam
    10: ["Castor", "Tobacco", "Pulses"],                   # Sandy Clay
    11: ["Banana", "Jute", "Wheat"],                       # Silty Clay
    12: ["Paddy", "Linseed", "Tea"]                        # Clay
}

In [28]:
def analyze_soil(diameters):
    composition, soil_type = predict_soil_type(diameters)
    crops = crop_recommendations.get(soil_type, ["No recommendation available"])
    return {
        "Soil Composition (Clay/Sand/Silt %)": composition,
        "Predicted Soil Type (Class)": soil_type,
        "Recommended Crops": crops
    }

In [29]:
# Example final call with same diameters
final_result = analyze_soil(sample_diameters)
final_result



{'Soil Composition (Clay/Sand/Silt %)': [20.0, 50.0, 30.0],
 'Predicted Soil Type (Class)': 7,
 'Recommended Crops': ['Maize', 'Sorghum', 'Sunflower']}