In [45]:
import pandas as pd

# Load the dataset
data = pd.read_csv('Crop_recommendationV2.csv')

# Display the first 5 rows
print("First 5 rows:")
print(data.head())

# Check for column names and basic information
print("\nColumn Names:")
print(data.columns)

print("\nDataset Info:")
print(data.info())

print("\nSummary Statistics:")
print(data.describe())


First 5 rows:
    N   P   K  temperature   humidity        ph    rainfall label  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice   
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice   
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice   
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice   

   soil_moisture  soil_type  ...  organic_matter  irrigation_frequency  \
0      29.446064          2  ...        3.121395                     4   
1      12.851183          3  ...        2.142021                     4   
2      29.363913          2  ...        1.474974                     1   
3      26.207732          3  ...        8.393907                     1   
4      28.236236          2  ...        5.202285                     3   

   crop_density  pest_pressure  fertilizer_usage  growth_stage  \
0     11.743910      57.607308        188.194958            

In [46]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the dataset
data = pd.read_csv('Crop_recommendationV2.csv')

# Preview data
print(data.head())

# ✅ Handle missing values (if any)
data = data.dropna()

# ✅ Encode the 'label' column (crop name)
label_encoder = LabelEncoder()
data['crop_label'] = label_encoder.fit_transform(data['label'])  # 🔁 Save into 'crop_label' instead

# ✅ Now drop the original 'label' column
data = data.drop(columns=['label'])

# ✅ Define input features and target
X = data.drop(columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'])  # Only input features
y = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]  # Output values

# ✅ Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


    N   P   K  temperature   humidity        ph    rainfall label  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice   
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice   
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice   
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice   

   soil_moisture  soil_type  ...  organic_matter  irrigation_frequency  \
0      29.446064          2  ...        3.121395                     4   
1      12.851183          3  ...        2.142021                     4   
2      29.363913          2  ...        1.474974                     1   
3      26.207732          3  ...        8.393907                     1   
4      28.236236          2  ...        5.202285                     3   

   crop_density  pest_pressure  fertilizer_usage  growth_stage  \
0     11.743910      57.607308        188.194958             1   
1     16

In [47]:
from sklearn.model_selection import train_test_split

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


Training set shape: (1760, 16)
Testing set shape: (440, 16)


In [48]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

# Initialize and train the model
rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
rf_model.fit(X_train, y_train)

# Evaluate the model
print("Training Score:", rf_model.score(X_train, y_train))
print("Testing Score:", rf_model.score(X_test, y_test))  

Training Score: 0.9683738374253917
Testing Score: 0.7526591343356596


In [49]:
import joblib

joblib.dump(rf_model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")


['label_encoder.pkl']

In [50]:
# import numpy as np

# def predict_adjustments(user_input):
#     """
#     Predict adjustments for soil/environment parameters.
#     Args:
#         user_input (dict): Soil/environment parameters provided by the user.
#     """
#     # Convert user input to DataFrame
#     input_data = pd.DataFrame([user_input])
#     input_data_scaled = scaler.transform(input_data)  # Scale input data
    
#     # Predict adjustments
#     predicted_adjustments = rf_model.predict(input_data_scaled)
    
#     # Display the results
#     print("\nPredicted Adjustments Needed:")
#     print(f"N: {predicted_adjustments[0][0]:.2f}")
#     print(f"P: {predicted_adjustments[0][1]:.2f}")
#     print(f"K: {predicted_adjustments[0][2]:.2f}")
#     print(f"Temperature: {predicted_adjustments[0][3]:.2f}°C")
#     print(f"Humidity: {predicted_adjustments[0][4]:.2f}%")
#     print(f"pH: {predicted_adjustments[0][5]:.2f}")
#     print(f"Rainfall: {predicted_adjustments[0][6]:.2f} mm")
import numpy as np
import pandas as pd  # ensure this is imported at the top

def predict_adjustments(user_input):
    """
    Predict adjustments for soil/environment parameters.
    Args:
        user_input (dict): Soil/environment parameters provided by the user.
    Returns:
        list: Predicted adjustment values
    """

    try:
        # Feature order must match training
        feature_order = [
            'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed', 
            'co2_concentration', 'organic_matter', 'irrigation_frequency',
            'crop_density', 'pest_pressure', 'fertilizer_usage', 'growth_stage',
            'urban_area_proximity', 'water_source_type', 'frost_risk',
            'water_usage_efficiency', 'crop_label'
        ]

        # Ensure all required fields are present
        for feature in feature_order:
            if feature not in user_input:
                raise ValueError(f"Missing feature: {feature}")

        # Convert to DataFrame and enforce column order
        input_df = pd.DataFrame([user_input])[feature_order]
        input_scaled = scaler.transform(input_df)
        prediction = rf_model.predict(input_scaled)[0]

        # Output the results
        print("\n📊 Predicted Adjustments Needed:")
        print(f"N: {prediction[0]:.2f}")
        print(f"P: {prediction[1]:.2f}")
        print(f"K: {prediction[2]:.2f}")
        print(f"Temperature: {prediction[3]:.2f} °C")
        print(f"Humidity: {prediction[4]:.2f} %")
        print(f"pH: {prediction[5]:.2f}")
        print(f"Rainfall: {prediction[6]:.2f} mm")

        return prediction

    except Exception as e:
        print("❌ Error in prediction:", e)


In [51]:
user_input = {
    'soil_moisture': 30,
    'soil_type': 1,                # e.g., Sandy
    'sunlight_exposure': 5,
    'wind_speed': 8,
    'co2_concentration': 400,
    'organic_matter': 3,
    'irrigation_frequency': 2,
    'crop_density': 10,
    'pest_pressure': 0,
    'fertilizer_usage': 100,
    'growth_stage': 1,
    'urban_area_proximity': 2,
    'water_source_type': 1,        # e.g., River
    'frost_risk': 0,
    'water_usage_efficiency': 1,
    'crop_label': 4                # this is encoded value of crop like 'Maize'
}


predict_adjustments(user_input)



📊 Predicted Adjustments Needed:
N: 23.24
P: 12.62
K: 30.06
Temperature: 26.53 °C
Humidity: 93.81 %
pH: 5.76
Rainfall: 171.74 mm


array([ 23.24      ,  12.62      ,  30.06      ,  26.52583435,
        93.81398148,   5.75602729, 171.74344385])

In [52]:
print("Features used during training:")
print(data.columns)


Features used during training:
Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall',
       'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed',
       'co2_concentration', 'organic_matter', 'irrigation_frequency',
       'crop_density', 'pest_pressure', 'fertilizer_usage', 'growth_stage',
       'urban_area_proximity', 'water_source_type', 'frost_risk',
       'water_usage_efficiency', 'crop_label'],
      dtype='object')


In [53]:
# def predict_adjustments(user_input):
#     """
#     Predict adjustments for soil/environment parameters.
#     Args:
#         user_input (dict): Soil/environment parameters provided by the user.
#     """
#     # Define the complete list of features based on the training dataset
#     feature_order = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 
#                      'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed',
#                      'co2_concentration', 'organic_matter', 'irrigation_frequency',
#                      'crop_density', 'pest_pressure', 'fertilizer_usage', 'growth_stage',
#                      'urban_area_proximity', 'water_source_type', 'frost_risk', 
#                      'water_usage_efficiency']

#     # Ensure all missing features are filled with 0 or default values
#     for feature in feature_order:
#         if feature not in user_input:
#             user_input[feature] = 0  # Fill with 0 if the feature is missing
    
#     # Convert user input to DataFrame and reorder columns
#     input_data = pd.DataFrame([user_input])
#     input_data = input_data[feature_order]  # Reorder columns to match training features
    
#     # Scale the input data
#     input_data_scaled = scaler.transform(input_data)
    
#     # Predict adjustments
#     predicted_adjustments = rf_model.predict(input_data_scaled)
    
#     # Display the results
#     print("\nPredicted Adjustments Needed:")
#     print(f"N: {predicted_adjustments[0][0]:.2f}")
#     print(f"P: {predicted_adjustments[0][1]:.2f}")
#     print(f"K: {predicted_adjustments[0][2]:.2f}")
#     print(f"Temperature: {predicted_adjustments[0][3]:.2f}°C")
#     print(f"Humidity: {predicted_adjustments[0][4]:.2f}%")
#     print(f"pH: {predicted_adjustments[0][5]:.2f}")
#     print(f"Rainfall: {predicted_adjustments[0][6]:.2f} mm")


In [54]:
# predicted_adjustments = predict_adjustments(user_input)


In [55]:
# user_input = {
#     'soil_moisture': 30,
#     'soil_type': 1,                # e.g., Sandy
#     'sunlight_exposure': 5,
#     'wind_speed': 8,
#     'co2_concentration': 400,
#     'organic_matter': 3,
#     'irrigation_frequency': 2,
#     'crop_density': 10,
#     'pest_pressure': 0,
#     'fertilizer_usage': 100,
#     'growth_stage': 1,
#     'urban_area_proximity': 2,
#     'water_source_type': 1,        # e.g., River
#     'frost_risk': 0,
#     'water_usage_efficiency': 1,
#     'crop_label': 4                # this is encoded value of crop like 'Maize'
# }


In [56]:
predict_adjustments(user_input)



📊 Predicted Adjustments Needed:
N: 23.24
P: 12.62
K: 30.06
Temperature: 26.53 °C
Humidity: 93.81 %
pH: 5.76
Rainfall: 171.74 mm


array([ 23.24      ,  12.62      ,  30.06      ,  26.52583435,
        93.81398148,   5.75602729, 171.74344385])

In [57]:
def generate_guidance(actual, predicted):
    tips = []
    for i, (label, unit) in enumerate([
        ("Nitrogen", "units"), ("Phosphorus", "units"), ("Potassium", "units"),
        ("Temperature", "°C"), ("Humidity", "%"), ("pH", ""), ("Rainfall", "mm")
    ]):
        diff = predicted[i] - actual[i]
        if abs(diff) < 1:
            tips.append(f"✅ {label} is already optimal.")
        elif diff > 0:
            tips.append(f"📈 Increase {label} by ~{diff:.1f} {unit}.")
        else:
            tips.append(f"📉 Decrease {label} by ~{abs(diff):.1f} {unit}.")
    return tips


In [58]:
actual_values = [20, 10, 20, 24, 90, 5.3, 170]  # example actual values
guidance = generate_guidance(actual_values, predicted_adjustments)

for line in guidance:
    print(line)


📈 Increase Nitrogen by ~3.2 units.
📈 Increase Phosphorus by ~2.6 units.
📈 Increase Potassium by ~10.1 units.
📈 Increase Temperature by ~2.5 °C.
📈 Increase Humidity by ~3.8 %.
✅ pH is already optimal.
📈 Increase Rainfall by ~1.7 mm.


In [59]:
def recommend_fertilizer(predicted_adjustments):
    n, p, k = predicted_adjustments[:3]
    print("\nFertilizer Recommendations:")
    if n > 0:
        print(f"Add Urea: {n * 2:.2f} kg (to increase Nitrogen)")
    if p > 0:
        print(f"Add DAP: {p * 1.5:.2f} kg (to increase Phosphorus)")
    if k > 0:
        print(f"Add Muriate of Potash: {k * 2:.2f} kg (to increase Potassium)")


In [60]:
def suggest_alternative_crops(user_input, predicted_adjustments):
    if sum(abs(i) for i in predicted_adjustments[:3]) > 50:
        print("\nAlternative Crop Suggestions:")
        print("Based on current soil, you can grow: 'Wheat', 'Barley', or 'Corn'")
    else:
        print("\nThe soil is suitable for the selected crop.")


In [61]:
def recommend_irrigation(soil_moisture, rainfall):
    print("\nIrrigation Recommendations:")
    if soil_moisture < 20:
        print("Irrigate immediately using drip irrigation.")
    elif rainfall > 50:
        print("No irrigation needed; rainfall is sufficient.")
    else:
        print("Monitor soil and weather closely. Moderate irrigation may be needed.")



In [62]:
import matplotlib.pyplot as plt

def visualize_adjustments(predicted_adjustments):
    adjustments = ['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH', 'Rainfall']
    
    values = predicted_adjustments  # ✅ Directly use the 1D list or array

    plt.figure(figsize=(10, 6))
    plt.bar(adjustments, values, color='skyblue')
    plt.title("Predicted Adjustments for Soil and Environment")
    plt.ylabel("Adjustment Value")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [63]:
def show_all_results(user_input):
    import matplotlib.pyplot as plt

    # Correct feature order for input only
    feature_order = [
        'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed', 
        'co2_concentration', 'organic_matter', 'irrigation_frequency',
        'crop_density', 'pest_pressure', 'fertilizer_usage', 'growth_stage',
        'urban_area_proximity', 'water_source_type', 'frost_risk',
        'water_usage_efficiency', 'crop_label'
    ]

    # Ensure all required features are present
    for feature in feature_order:
        if feature not in user_input:
            user_input[feature] = 0  # default

    # Prepare input
    input_data = pd.DataFrame([user_input])[feature_order]
    input_data_scaled = scaler.transform(input_data)

    # Predict
    predicted_adjustments = rf_model.predict(input_data_scaled)[0]

    # Display predictions
    print("\n📊 Predicted Adjustments Needed:")
    adjustments = ['Nitrogen (N)', 'Phosphorus (P)', 'Potassium (K)', 
                   'Temperature (°C)', 'Humidity (%)', 'pH', 'Rainfall (mm)']
    
    for i, value in enumerate(predicted_adjustments):
        print(f"{adjustments[i]}: {value:.2f}")

    # Fertilizer Recommendation
    print("\n🧪 Fertilizer Recommendations:")
    n, p, k = predicted_adjustments[:3]
    if n > 0:
        print(f"Add Urea: {n * 2:.2f} kg")
    if p > 0:
        print(f"Add DAP: {p * 1.5:.2f} kg")
    if k > 0:
        print(f"Add Muriate of Potash: {k * 2:.2f} kg")
    if n <= 0 and p <= 0 and k <= 0:
        print("No additional fertilizers needed.")

    # Crop Suggestions
    if sum(abs(i) for i in predicted_adjustments[:3]) > 50:
        print("\n🌱 Alternative Crop Suggestions:")
        print("Try: 'Wheat', 'Barley', or 'Corn'")
    else:
        print("\n✅ Soil is suitable for the selected crop.")

    # Visualization
    plt.figure(figsize=(10, 6))
    plt.bar(adjustments, predicted_adjustments, color='skyblue')
    plt.title("Predicted Adjustments for Soil and Environmental Parameters")
    plt.ylabel("Adjustment Values")
    plt.xlabel("Parameters")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [64]:
import joblib
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
print("Scaler and label encoder saved.")



Scaler and label encoder saved.


In [65]:
model = joblib.load("model.pkl")  # Ensure 'model.pkl' is in the same folder
scaler = joblib.load("scaler.pkl")
label_encoder = joblib.load("label_encoder.pkl")


In [66]:
from sklearn.metrics import mean_absolute_error, r2_score
y_pred = model.predict(X_test)
print("R2:", r2_score(y_test, y_pred))


R2: 0.7526591343356596
