In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('Crop_recommendationV2.csv')

# Display the first 5 rows
print("First 5 rows:")
print(data.head())

# Check for column names and basic information
print("\nColumn Names:")
print(data.columns)

print("\nDataset Info:")
print(data.info())

print("\nSummary Statistics:")
print(data.describe())


First 5 rows:
    N   P   K  temperature   humidity        ph    rainfall label  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice   
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice   
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice   
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice   

   soil_moisture  soil_type  ...  organic_matter  irrigation_frequency  \
0      29.446064          2  ...        3.121395                     4   
1      12.851183          3  ...        2.142021                     4   
2      29.363913          2  ...        1.474974                     1   
3      26.207732          3  ...        8.393907                     1   
4      28.236236          2  ...        5.202285                     3   

   crop_density  pest_pressure  fertilizer_usage  growth_stage  \
0     11.743910      57.607308        188.194958            

In [2]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Handle missing values
data = data.dropna()

# Encode the 'label' column (crop name)
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Define input features (X) and target (y)
X = data.drop(columns=['label'])  # All soil/environment parameters
y = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]  # Adjustments needed

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [3]:
from sklearn.model_selection import train_test_split

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


Training set shape: (1760, 22)
Testing set shape: (440, 22)


In [4]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

# Initialize and train the model
rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
rf_model.fit(X_train, y_train)

# Evaluate the model
print("Training Score:", rf_model.score(X_train, y_train))
print("Testing Score:", rf_model.score(X_test, y_test))


Training Score: 0.9999815718599752
Testing Score: 0.9998378668522723


In [5]:
import numpy as np

def predict_adjustments(user_input):
    """
    Predict adjustments for soil/environment parameters.
    Args:
        user_input (dict): Soil/environment parameters provided by the user.
    """
    # Convert user input to DataFrame
    input_data = pd.DataFrame([user_input])
    input_data_scaled = scaler.transform(input_data)  # Scale input data
    
    # Predict adjustments
    predicted_adjustments = rf_model.predict(input_data_scaled)
    
    # Display the results
    print("\nPredicted Adjustments Needed:")
    print(f"N: {predicted_adjustments[0][0]:.2f}")
    print(f"P: {predicted_adjustments[0][1]:.2f}")
    print(f"K: {predicted_adjustments[0][2]:.2f}")
    print(f"Temperature: {predicted_adjustments[0][3]:.2f}°C")
    print(f"Humidity: {predicted_adjustments[0][4]:.2f}%")
    print(f"pH: {predicted_adjustments[0][5]:.2f}")
    print(f"Rainfall: {predicted_adjustments[0][6]:.2f} mm")


In [6]:
user_input = {
    'N': 40, 'P': 20, 'K': 30, 'temperature': 25, 'humidity': 50, 'ph': 6.5,
    'rainfall': 100, 'soil_moisture': 30, 'sunlight_exposure': 5, 'wind_speed': 8,
    'co2_concentration': 400
}

predict_adjustments(user_input)


ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- crop_density
- fertilizer_usage
- frost_risk
- growth_stage
- irrigation_frequency
- ...


In [7]:
print("Features used during training:")
print(data.columns)


Features used during training:
Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label',
       'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed',
       'co2_concentration', 'organic_matter', 'irrigation_frequency',
       'crop_density', 'pest_pressure', 'fertilizer_usage', 'growth_stage',
       'urban_area_proximity', 'water_source_type', 'frost_risk',
       'water_usage_efficiency'],
      dtype='object')


In [8]:
def predict_adjustments(user_input):
    """
    Predict adjustments for soil/environment parameters.
    Args:
        user_input (dict): Soil/environment parameters provided by the user.
    """
    # Define the complete list of features based on the training dataset
    feature_order = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 
                     'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed',
                     'co2_concentration', 'organic_matter', 'irrigation_frequency',
                     'crop_density', 'pest_pressure', 'fertilizer_usage', 'growth_stage',
                     'urban_area_proximity', 'water_source_type', 'frost_risk', 
                     'water_usage_efficiency']

    # Ensure all missing features are filled with 0 or default values
    for feature in feature_order:
        if feature not in user_input:
            user_input[feature] = 0  # Fill with 0 if the feature is missing
    
    # Convert user input to DataFrame and reorder columns
    input_data = pd.DataFrame([user_input])
    input_data = input_data[feature_order]  # Reorder columns to match training features
    
    # Scale the input data
    input_data_scaled = scaler.transform(input_data)
    
    # Predict adjustments
    predicted_adjustments = rf_model.predict(input_data_scaled)
    
    # Display the results
    print("\nPredicted Adjustments Needed:")
    print(f"N: {predicted_adjustments[0][0]:.2f}")
    print(f"P: {predicted_adjustments[0][1]:.2f}")
    print(f"K: {predicted_adjustments[0][2]:.2f}")
    print(f"Temperature: {predicted_adjustments[0][3]:.2f}°C")
    print(f"Humidity: {predicted_adjustments[0][4]:.2f}%")
    print(f"pH: {predicted_adjustments[0][5]:.2f}")
    print(f"Rainfall: {predicted_adjustments[0][6]:.2f} mm")


In [9]:
user_input = {
    'N': 40, 'P': 20, 'K': 30, 'temperature': 25, 'humidity': 50, 'ph': 6.5,
    'rainfall': 100, 'soil_moisture': 30, 'soil_type': 1, 'sunlight_exposure': 5, 
    'wind_speed': 8, 'co2_concentration': 400, 'organic_matter': 3, 
    'irrigation_frequency': 2, 'crop_density': 10, 'pest_pressure': 0, 
    'fertilizer_usage': 100, 'growth_stage': 1, 'urban_area_proximity': 2, 
    'water_source_type': 1, 'frost_risk': 0, 'water_usage_efficiency': 1
}


In [10]:
predict_adjustments(user_input)



Predicted Adjustments Needed:
N: 40.00
P: 20.00
K: 30.00
Temperature: 25.00°C
Humidity: 50.05%
pH: 6.50
Rainfall: 100.03 mm


In [11]:
def recommend_fertilizer(predicted_adjustments):
    n, p, k = predicted_adjustments[0][0], predicted_adjustments[0][1], predicted_adjustments[0][2]
    print("\nFertilizer Recommendations:")
    if n > 0:
        print(f"Add Urea: {n * 2} kg (to increase Nitrogen)")
    if p > 0:
        print(f"Add DAP: {p * 1.5} kg (to increase Phosphorus)")
    if k > 0:
        print(f"Add Muriate of Potash: {k * 2} kg (to increase Potassium)")


In [12]:
def suggest_alternative_crops(user_input, predicted_adjustments):
    if sum(abs(i) for i in predicted_adjustments[0][:3]) > 50:  # If adjustments are high
        print("\nAlternative Crop Suggestions:")
        print("Based on current soil, you can grow: 'Wheat', 'Barley', or 'Corn'")


In [13]:
def recommend_irrigation(soil_moisture, rainfall):
    print("\nIrrigation Recommendations:")
    if soil_moisture < 20:
        print("Irrigate immediately using drip irrigation.")
    elif rainfall > 50:
        print("No irrigation needed; rainfall is sufficient.")


In [14]:
import matplotlib.pyplot as plt

def visualize_adjustments(predicted_adjustments):
    adjustments = ['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH', 'Rainfall']
    values = predicted_adjustments[0]
    
    plt.figure(figsize=(10, 6))
    plt.bar(adjustments, values, color='skyblue')
    plt.title("Predicted Adjustments for Soil and Environment")
    plt.ylabel("Adjustment Value")
    plt.show()


In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def show_all_results(user_input):
    """
    Displays predicted adjustments, fertilizer recommendations, alternative crops,
    and visualizations for the user's input.
    Args:
        user_input (dict): Soil and environmental parameters provided by the user.
    """
    # List of features (to align input with training data)
    feature_order = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 
                     'soil_moisture', 'soil_type', 'sunlight_exposure', 'wind_speed',
                     'co2_concentration', 'organic_matter', 'irrigation_frequency',
                     'crop_density', 'pest_pressure', 'fertilizer_usage', 'growth_stage',
                     'urban_area_proximity', 'water_source_type', 'frost_risk', 
                     'water_usage_efficiency']

    # Ensure all missing features are filled with 0
    for feature in feature_order:
        if feature not in user_input:
            user_input[feature] = 0  # Default value
    
    # Convert user input to DataFrame and reorder columns
    input_data = pd.DataFrame([user_input])
    input_data = input_data[feature_order]  # Match order of features
    
    # Scale input data
    input_data_scaled = scaler.transform(input_data)
    
    # Predict adjustments
    predicted_adjustments = rf_model.predict(input_data_scaled)
    
    # Display predicted adjustments
    print("\nPredicted Adjustments Needed:")
    adjustments = ['Nitrogen (N)', 'Phosphorus (P)', 'Potassium (K)', 
                   'Temperature (°C)', 'Humidity (%)', 'pH', 'Rainfall (mm)']
    
    for i, value in enumerate(predicted_adjustments[0]):
        print(f"{adjustments[i]}: {value:.2f}")
    
    # Fertilizer Recommendations
    print("\nFertilizer Recommendations:")
    n, p, k = predicted_adjustments[0][0], predicted_adjustments[0][1], predicted_adjustments[0][2]
    if n > 0:
        print(f"Add Urea: {n * 2:.2f} kg (to increase Nitrogen)")
    if p > 0:
        print(f"Add DAP: {p * 1.5:.2f} kg (to increase Phosphorus)")
    if k > 0:
        print(f"Add Muriate of Potash: {k * 2:.2f} kg (to increase Potassium)")
    if n <= 0 and p <= 0 and k <= 0:
        print("No additional fertilizers needed for N, P, or K.")
    
    # Alternative Crop Suggestions
    if sum(abs(i) for i in predicted_adjustments[0][:3]) > 50:  # Threshold for adjustments
        print("\nAlternative Crop Suggestions:")
        print("The current soil is not ideal for this crop. You can consider growing 'Wheat', 'Barley', or 'Corn'.")
    else:
        print("\nThe soil is suitable for the selected crop.")

    # Visualization of adjustments
    plt.figure(figsize=(10, 6))
    plt.bar(adjustments, predicted_adjustments[0], color='skyblue')
    plt.title("Predicted Adjustments for Soil and Environmental Parameters")
    plt.ylabel("Adjustment Values")
    plt.xlabel("Parameters")
    plt.xticks(rotation=45)
    plt.show()


In [16]:
import joblib

# Save the trained model
joblib.dump(rf_model, 'model.pkl')  # For Random Forest
print("Model saved as model.pkl")


Model saved as model.pkl


In [17]:
model = joblib.load("model.pkl")  # Ensure 'model.pkl' is in the same folder
