In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib


In [3]:
file_path = "/content/datasets - datasets.csv"

data = pd.read_csv(file_path)

print("Dataset loaded:", data.shape)
print("Columns:", data.columns.tolist())
print("Target distribution:\n", data["Irrigation"].value_counts())


Dataset loaded: (501, 6)
Columns: ['CropType', 'CropDays', 'SoilMoisture', 'temperature', 'Humidity', 'Irrigation']
Target distribution:
 Irrigation
0    304
1    197
Name: count, dtype: int64


In [4]:
label_encoder = LabelEncoder()
data["CropType_encoded"] = label_encoder.fit_transform(data["CropType"])

data["moisture_temp_ratio"] = data["SoilMoisture"] / (data["temperature"] + 1)
data["humidity_temp_ratio"] = data["Humidity"] / (data["temperature"] + 1)
data["moisture_deficit"] = 1000 - data["SoilMoisture"]

feature_columns = [
    "CropDays", "SoilMoisture", "temperature", "Humidity",
    "CropType_encoded", "moisture_temp_ratio",
    "humidity_temp_ratio", "moisture_deficit"
]

X = data[feature_columns]
y = data["Irrigation"]

print("Features selected:", feature_columns)
print("X shape:", X.shape)


Features selected: ['CropDays', 'SoilMoisture', 'temperature', 'Humidity', 'CropType_encoded', 'moisture_temp_ratio', 'humidity_temp_ratio', 'moisture_deficit']
X shape: (501, 8)


In [6]:
scaler = StandardScaler()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.3f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 0.931

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.95      0.94        61
           1       0.92      0.90      0.91        40

    accuracy                           0.93       101
   macro avg       0.93      0.93      0.93       101
weighted avg       0.93      0.93      0.93       101



In [8]:
importance_df = pd.DataFrame({
    "feature": feature_columns,
    "importance": model.feature_importances_
}).sort_values("importance", ascending=False)

importance_df


Unnamed: 0,feature,importance
1,SoilMoisture,0.279403
7,moisture_deficit,0.228267
5,moisture_temp_ratio,0.148132
4,CropType_encoded,0.08392
0,CropDays,0.077368
2,temperature,0.070421
6,humidity_temp_ratio,0.065992
3,Humidity,0.046497


In [10]:
def predict_irrigation(crop_type, crop_days, soil_moisture, temperature, humidity):
    crop_type_encoded = label_encoder.transform([crop_type])[0]

    moisture_temp_ratio = soil_moisture / (temperature + 1)
    humidity_temp_ratio = humidity / (temperature + 1)
    moisture_deficit = 1000 - soil_moisture

    features = np.array([[
        crop_days, soil_moisture, temperature, humidity, crop_type_encoded,
        moisture_temp_ratio, humidity_temp_ratio, moisture_deficit
    ]])

    features_scaled = scaler.transform(features)

    prediction = model.predict(features_scaled)[0]
    probability = model.predict_proba(features_scaled)[0]

    return prediction, probability


pred, prob = predict_irrigation("Wheat", 15, 400, 25, 30)
print("Prediction:", pred)
print("Probability:", prob)


Prediction: 0
Probability: [0.76 0.24]


