<a href="https://colab.research.google.com/github/Chehan2004/DSGP_GROUP_36/blob/treatments_and_recommendations/Fertilizer%26Pesticide.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', 25) #to display all columns

In [None]:
df = pd.read_csv("/content/tea_leaf_dataset_enhanced.csv")
df.head()


Unnamed: 0,Sample ID,Leaf Color,Leaf Texture,Soil pH,Moisture (%),Temp (Â°C),Pest Type,Nutrient Deficiency,Recommended Fertilizer,Fertilizer Dosage (kg/ha),Recommended Pesticide,Pesticide Dosage (ml/L),Detected_Disease,Recommended_Pesticide,Recommended_Dosage,Recommendation_Notes,Recommended_Fertilizer_Notes
0,T001,Brownish,Smooth,5.3,16,31,Leaf roller,Nitrogen,Urea (46-0-0),100,Lambda-cyhalothrin 2.5% EC,0.8,,,,,
1,T002,Brownish,Curled,4.7,27,30,Tea mosquito bug,Magnesium,Epsom Salt (MgSOâ‚„),60,Imidacloprid 17.8% SL,0.5,,,,,
2,T003,Brownish,Curled,4.2,28,32,Red spider mite,Potassium,MOP (0-0-60),80,Abamectin 1.8% EC,1.5,,,,,
3,T004,Light green,Brittle,4.2,25,25,Thrips,,NPK 15-15-15 (balanced),60,Chlorpyrifos 20% EC,1.0,,,,,
4,T005,Pale green,Normal,6.0,29,20,Tea mosquito bug,Potassium,MOP (0-0-60),80,Imidacloprid 17.8% SL,0.5,,,,,


In [None]:
# to remove duplicates
df = df.drop_duplicates()

# Replacing common missing strings with real NaNs
df = df.replace(["N/A", "none", "None", "-", ""], np.nan)
new_df = df.dropna(axis=1, how='all')
df = new_df.dropna(axis=0, how='all')

#numeric missing values >>> median
num_cols = df.select_dtypes(include=['int64', 'float64']).columns
df[num_cols] = df[num_cols].fillna(df[num_cols].median())

# categorical missing >>> mode
cat_cols = df.select_dtypes(include=['object']).columns
df[cat_cols] = df[cat_cols].fillna(df[cat_cols].mode().iloc[0])


In [None]:
df.head()

Unnamed: 0,Sample ID,Leaf Color,Leaf Texture,Soil pH,Moisture (%),Temp (Â°C),Pest Type,Nutrient Deficiency,Recommended Fertilizer,Fertilizer Dosage (kg/ha),Recommended Pesticide,Pesticide Dosage (ml/L),Soil_Acidic_Flag,pH_Deviation,Temp_Normalized,Moisture_Normalized,Health_Score,Moisture_Temp_Interaction,pH_Deficiency_Interaction,Leaf Color_Encoded,Leaf Texture_Encoded,Pest Type_Encoded,Nutrient Deficiency_Encoded
0,T001,Brownish,Smooth,5.3,16,31,Leaf roller,Nitrogen,Urea (46-0-0),100,Lambda-cyhalothrin 2.5% EC,0.8,1,0.2,1.307127,-1.424581,0,496,5.3,0,4,0,1
1,T002,Brownish,Curled,4.7,27,30,Tea mosquito bug,Magnesium,Epsom Salt (MgSOâ‚„),60,Imidacloprid 17.8% SL,0.5,1,0.8,1.043912,0.948283,-1,810,0.0,0,1,2,0
2,T003,Brownish,Curled,4.2,28,32,Red spider mite,Potassium,MOP (0-0-60),80,Abamectin 1.8% EC,1.5,1,1.3,1.570342,1.163998,-1,896,12.6,0,1,1,3
3,T004,Light green,Brittle,4.2,25,25,Thrips,Magnesium,NPK 15-15-15 (balanced),60,Chlorpyrifos 20% EC,1.0,1,1.3,-0.272165,0.516853,-1,625,0.0,2,0,3,0
4,T005,Pale green,Normal,6.0,29,20,Tea mosquito bug,Potassium,MOP (0-0-60),80,Imidacloprid 17.8% SL,0.5,0,0.5,-1.588241,1.379712,0,580,18.0,4,2,2,3


In [None]:
df["Soil_Acidic_Flag"] = df["Soil pH"].apply(lambda x: 1 if x < 5.5 else 0)
df["pH_Deviation"] = abs(df["Soil pH"] - 5.5)
df["Temp_Normalized"] = (df["Temp (Â°C)"] - df["Temp (Â°C)"].mean()) / df["Temp (Â°C)"].std()
df["Moisture_Normalized"] = (df["Moisture (%)"] - df["Moisture (%)"].mean()) / df["Moisture (%)"].std()

In [None]:
def health_score(row):
    score = 0

    # leaf color impact
    if row["Leaf Color"].lower() in ["yellow", "brown"]:
        score -= 2
    else:
        score += 1

    # moisture stress
    if row["Moisture (%)"] < 30:
        score -= 1

    # pH
    if row["Soil pH"] < 5.0:
        score -= 1

    return score

df["Health_Score"] = df.apply(health_score, axis=1)

In [None]:
df["Moisture_Temp_Interaction"] = df["Moisture (%)"] * df["Temp (Â°C)"]
df["pH_Deficiency_Interaction"] = df["Soil pH"] * df["Nutrient Deficiency"].astype("category").cat.codes

In [None]:
label_enc = LabelEncoder()

for col in ["Leaf Color", "Leaf Texture", "Pest Type", "Nutrient Deficiency"]:
    df[col + "_Encoded"] = label_enc.fit_transform(df[col])

In [None]:
features = [
    "Soil pH", "Moisture (%)", "Temp (Â°C)",
    "Soil_Acidic_Flag", "pH_Deviation", "Temp_Normalized",
    "Moisture_Normalized", "Health_Score",
    "Moisture_Temp_Interaction", "pH_Deficiency_Interaction"
]

X = df[features]

y = df["Recommended Pesticide"]   #  recommends fertilizer

In [None]:
df.head()

Unnamed: 0,Sample ID,Leaf Color,Leaf Texture,Soil pH,Moisture (%),Temp (Â°C),Pest Type,Nutrient Deficiency,Recommended Fertilizer,Fertilizer Dosage (kg/ha),Recommended Pesticide,Pesticide Dosage (ml/L),Soil_Acidic_Flag,pH_Deviation,Temp_Normalized,Moisture_Normalized,Health_Score,Moisture_Temp_Interaction,pH_Deficiency_Interaction,Leaf Color_Encoded,Leaf Texture_Encoded,Pest Type_Encoded,Nutrient Deficiency_Encoded
0,T001,Brownish,Smooth,5.3,16,31,Leaf roller,Nitrogen,Urea (46-0-0),100,Lambda-cyhalothrin 2.5% EC,0.8,1,0.2,1.307127,-1.424581,0,496,5.3,0,4,0,1
1,T002,Brownish,Curled,4.7,27,30,Tea mosquito bug,Magnesium,Epsom Salt (MgSOâ‚„),60,Imidacloprid 17.8% SL,0.5,1,0.8,1.043912,0.948283,-1,810,0.0,0,1,2,0
2,T003,Brownish,Curled,4.2,28,32,Red spider mite,Potassium,MOP (0-0-60),80,Abamectin 1.8% EC,1.5,1,1.3,1.570342,1.163998,-1,896,12.6,0,1,1,3
3,T004,Light green,Brittle,4.2,25,25,Thrips,Magnesium,NPK 15-15-15 (balanced),60,Chlorpyrifos 20% EC,1.0,1,1.3,-0.272165,0.516853,-1,625,0.0,2,0,3,0
4,T005,Pale green,Normal,6.0,29,20,Tea mosquito bug,Potassium,MOP (0-0-60),80,Imidacloprid 17.8% SL,0.5,0,0.5,-1.588241,1.379712,0,580,18.0,4,2,2,3


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
print("TRAIN DATA:")
display(X_train.head())

print("\nTEST DATA:")
display(X_test.head())


TRAIN DATA:


Unnamed: 0,Soil pH,Moisture (%),Temp (Â°C),Soil_Acidic_Flag,pH_Deviation,Temp_Normalized,Moisture_Normalized,Health_Score,Moisture_Temp_Interaction,pH_Deficiency_Interaction,Leaf Color_Encoded,Leaf Texture_Encoded,Pest Type_Encoded,Nutrient Deficiency_Encoded
249,6.4,24,29,0,0.9,0.780696,0.301138,0,696,6.4,1,0,0,1
433,4.6,22,21,1,0.9,-1.325025,-0.130292,-1,462,4.6,0,4,3,1
19,4.8,23,29,1,0.7,0.780696,0.085423,-4,667,4.8,5,3,3,1
322,4.4,20,29,1,1.1,0.780696,-0.561722,-4,580,8.8,5,0,3,2
332,5.0,28,21,1,0.5,-1.325025,1.163998,0,588,10.0,2,3,2,2



TEST DATA:


Unnamed: 0,Soil pH,Moisture (%),Temp (Â°C),Soil_Acidic_Flag,pH_Deviation,Temp_Normalized,Moisture_Normalized,Health_Score,Moisture_Temp_Interaction,pH_Deficiency_Interaction,Leaf Color_Encoded,Leaf Texture_Encoded,Pest Type_Encoded,Nutrient Deficiency_Encoded
361,4.1,26,30,1,1.4,1.043912,0.732568,-1,780,4.1,3,4,3,1
73,4.9,25,24,1,0.6,-0.53538,0.516853,-1,600,9.8,2,1,3,2
374,5.2,23,20,1,0.3,-1.588241,0.085423,0,460,0.0,0,2,0,0
155,5.4,21,28,1,0.1,0.517481,-0.346007,0,588,0.0,1,2,3,0
104,5.5,28,27,0,0.0,0.254266,1.163998,0,756,5.5,3,0,3,1
