In [1]:
import pandas as pd

# Load the dataset
file_path = "C:\\Users\\AKOS\\Downloads\\irrigation_strategy_with_soil_type.csv"
df = pd.read_csv(file_path)

# Display basic info and the first few rows
df.info(), df.head()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1380 entries, 0 to 1379
Data columns (total 7 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   crop                        1380 non-null   object 
 1   season                      1380 non-null   object 
 2   altitude                    1380 non-null   object 
 3   soil_type                   1380 non-null   object 
 4   water_requirement_mm_day    1380 non-null   int64  
 5   irrigation_strategy         1380 non-null   object 
 6   total_water_requirement_m3  1380 non-null   float64
dtypes: float64(1), int64(1), object(5)
memory usage: 75.6+ KB


(None,
     crop season altitude soil_type  water_requirement_mm_day  \
 0  Maize    dry      low     sandy                       850   
 1  Maize    dry      low     loamy                       850   
 2  Maize    dry      low      clay                       850   
 3  Maize    dry      low     silty                       850   
 4  Maize    dry      low     peaty                       850   
 
                              irrigation_strategy  total_water_requirement_m3  
 0                    Drip irrigation or rain-fed                   8500000.0  
 1            Sprinkler irrigation or combination                   8500000.0  
 2  Flood irrigation or high-frequency sprinklers                   8500000.0  
 3            Sprinkler irrigation or combination                   8500000.0  
 4  Flood irrigation or high-frequency sprinklers                   8500000.0  )

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode categorical variables
categorical_cols = ["crop", "season", "altitude", "soil_type"]
label_encoders = {col: LabelEncoder() for col in categorical_cols}

for col in categorical_cols:
    df[col] = label_encoders[col].fit_transform(df[col])

# Encode target variable for classification (irrigation_strategy)
label_encoder_strategy = LabelEncoder()
df["irrigation_strategy"] = label_encoder_strategy.fit_transform(df["irrigation_strategy"])

# Define features and targets
X = df.drop(columns=["irrigation_strategy", "total_water_requirement_m3","water_requirement_mm_day"])
y_classification = df["irrigation_strategy"]
y_regression = df["water_requirement_mm_day"]

# Split into train and test sets (80% train, 20% test)
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_classification, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_regression, test_size=0.2, random_state=42)

# Check dataset shapes
X_train_class.shape, X_test_class.shape, y_train_class.shape, y_test_class.shape, X_train_reg.shape, X_test_reg.shape, y_train_reg.shape, y_test_reg.shape


((1104, 4), (276, 4), (1104,), (276,), (1104, 4), (276, 4), (1104,), (276,))

In [None]:
import sys
print(sys.executable)


In [3]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
import numpy as np

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_class, y_train_class)
y_pred_class = clf.predict(X_test_class)
classification_accuracy = accuracy_score(y_test_class, y_pred_class)

# Train Random Forest Regressor
reg = RandomForestRegressor(n_estimators=100, random_state=42)
reg.fit(X_train_reg, y_train_reg)
y_pred_reg = reg.predict(X_test_reg)
regression_mae = mean_absolute_error(y_test_reg, y_pred_reg)
regression_rmse = np.sqrt(mean_squared_error(y_test_reg, y_pred_reg))


# Return model evaluation metrics
classification_accuracy, regression_mae, regression_rmse


(1.0, 0.0, 0.0)

In [4]:
import pandas as pd

# Function to get user selection from available options
def get_user_choice(options, feature_name):
    print(f"\nAvailable {feature_name}: {options}")
    choice = input(f"Enter {feature_name} from the list above: ").strip()
    
    while choice not in options:
        print("Invalid choice. Please choose from the available list.")
        choice = input(f"Enter {feature_name} from the list above: ").strip()
    
    return choice

# Get all available options from the dataset
available_crops = list(label_encoders["crop"].classes_)
available_seasons = list(label_encoders["season"].classes_)
available_altitudes = list(label_encoders["altitude"].classes_)
available_soil_types = list(label_encoders["soil_type"].classes_)

# Ask user for inputs
selected_crop = get_user_choice(available_crops, "Crop")
selected_season = get_user_choice(available_seasons, "Season")
selected_altitude = get_user_choice(available_altitudes, "Altitude")
selected_soil_type = get_user_choice(available_soil_types, "Soil Type")

# Create a DataFrame with the selected inputs
new_sample = pd.DataFrame({
    "crop": [label_encoders["crop"].transform([selected_crop])[0]],  
    "season": [label_encoders["season"].transform([selected_season])[0]],  
    "altitude": [label_encoders["altitude"].transform([selected_altitude])[0]],  
    "soil_type": [label_encoders["soil_type"].transform([selected_soil_type])[0]],   
})

# Predict irrigation strategy
predicted_strategy = clf.predict(new_sample)
predicted_strategy_label = label_encoder_strategy.inverse_transform(predicted_strategy)

# Predict water requirement
predicted_water = reg.predict(new_sample)

# Display results
print("\n--- Prediction Results ---")
print(f"Selected Crop: {selected_crop}")
print(f"Selected Season: {selected_season}")
print(f"Selected Altitude: {selected_altitude}")
print(f"Selected Soil Type: {selected_soil_type}")
print(f"Predicted Irrigation Strategy: {predicted_strategy_label[0]}")
print(f"Predicted Total Water Requirement (m³): {predicted_water[0]:.2f}")



Available Crop: ['Avocado', 'Banana', 'Barley', 'Beans', 'Cabbage', 'Carrots', 'Cassava', 'Chili Peppers', 'Coffee', 'Cotton', 'Cucumber', 'Eggplant', 'Ginger', 'Green Grams', 'Green Peppers', 'Groundnuts', 'Guava', 'Irish Potatoes', 'Kale', 'Lettuce', 'Macadamia', 'Maize', 'Mango', 'Millet', 'Oats', 'Onions', 'Palm Oil', 'Papaya', 'Passion Fruit', 'Peas', 'Pineapple', 'Pyrethrum', 'Rice', 'Sorghum', 'Soybeans', 'Spinach', 'Sugarcane', 'Sunflower', 'Sweet Potatoes', 'Taro', 'Tea', 'Tomatoes', 'Tree Tomato', 'Vanilla', 'Wheat', 'Yams']


Enter Crop from the list above:  Banana



Available Season: ['dry', 'wet']


Enter Season from the list above:  dry



Available Altitude: ['high', 'low', 'mid']


Enter Altitude from the list above:  mid



Available Soil Type: ['clay', 'loamy', 'peaty', 'sandy', 'silty']


Enter Soil Type from the list above:  peaty



--- Prediction Results ---
Selected Crop: Banana
Selected Season: dry
Selected Altitude: mid
Selected Soil Type: peaty
Predicted Irrigation Strategy: Flood irrigation or high-frequency sprinklers
Predicted Total Water Requirement (m³): 900.00


In [6]:
import os
print(os.getcwd())



C:\Users\AKOS


'C:\\Users\\AKOS'