### 1.1 Import all libraries

In [1]:
# Data handling
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Deep Learning (for disease detection)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

# Utilities
import os


### 1.2 Load the crop yield dataset

In [2]:
# Load your crop yield dataset
crop_df = pd.read_csv('Updated_Crop_Yield_Prediction_Data.csv')

# Display first few rows
crop_df.head()


Unnamed: 0,Year,State,District,Crop,Area (1000 ha),Production (1000 tons),Yield (Kg per ha),Latitude,Longitude,Growing Season,...,Irrigation Method,Fertilizer Used (kg/ha),Pesticide Usage (kg/ha),Average Temperature (°C),Total Rainfall (mm),Humidity (%),Solar Radiation (W/m²),Pest Incidence (%),Disease Incidence (%),Previous Year Yield (Kg per ha)
0,1966,Andhra Pradesh,Ananthapur,BARLEY,0.0,0.0,0.0,18.861663,69.931545,Rabi,...,Sprinkler,77.499244,0.339829,12.836761,1989.163404,99.954848,877.130033,78.860445,62.899684,0.0
1,1966,Andhra Pradesh,Ananthapur,CASTOR,10.9,2.1,192.66,35.570715,88.417079,Zaid,...,Drip,104.609524,5.45362,11.691893,1646.565457,47.343296,657.080388,63.587315,28.61672,226.652489
2,1966,Andhra Pradesh,Ananthapur,CHICKPEA,2.0,1.0,500.0,29.227824,81.42677,Kharif,...,Sprinkler,72.849583,6.295011,25.108588,1940.897422,94.243966,100.950962,83.084613,38.801785,472.723823
3,1966,Andhra Pradesh,Ananthapur,COTTON,42.0,2.0,47.62,25.361096,71.514013,Rabi,...,Drip,169.250142,6.178908,34.626835,2104.871721,76.118179,751.248465,81.083124,59.704964,46.798309
4,1966,Andhra Pradesh,Ananthapur,FINGER MILLET,39.0,29.0,743.59,12.524541,83.872626,Rabi,...,Sprinkler,98.442821,9.814856,33.885312,563.98092,68.152758,612.576083,34.820268,29.856022,696.308385


### 1.3 Explore basic info

In [3]:
# Check shape and info
print("Shape:", crop_df.shape)
crop_df.info()

# Check for missing values
crop_df.isnull().sum()


Shape: (452088, 26)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 452088 entries, 0 to 452087
Data columns (total 26 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   Year                             452088 non-null  int64  
 1   State                            452088 non-null  object 
 2   District                         452088 non-null  object 
 3   Crop                             452088 non-null  object 
 4   Area (1000 ha)                   452088 non-null  float64
 5   Production (1000 tons)           371358 non-null  float64
 6   Yield (Kg per ha)                371358 non-null  float64
 7   Latitude                         452088 non-null  float64
 8   Longitude                        452088 non-null  float64
 9   Growing Season                   452088 non-null  object 
 10  Soil Type                        452088 non-null  object 
 11  Soil pH                          452088 non-n

Year                                    0
State                                   0
District                                0
Crop                                    0
Area (1000 ha)                          0
Production (1000 tons)              80730
Yield (Kg per ha)                   80730
Latitude                                0
Longitude                               0
Growing Season                          0
Soil Type                               0
Soil pH                                 0
Soil Moisture (%)                       0
Nitrogen (N) (%)                        0
Phosphorus (P) (%)                      0
Potassium (K) (%)                       0
Irrigation Method                  112893
Fertilizer Used (kg/ha)                 0
Pesticide Usage (kg/ha)                 0
Average Temperature (°C)                0
Total Rainfall (mm)                     0
Humidity (%)                            0
Solar Radiation (W/m²)                  0
Pest Incidence (%)                

### 1.4 Load the PlantVillage dataset

In [4]:
# Check available image folders (diseases)
base_dir = "PlantVillage"
os.listdir(base_dir)[:10]


['Pepper__bell___Bacterial_spot',
 'Pepper__bell___healthy',
 'Potato___Early_blight',
 'Potato___healthy',
 'Potato___Late_blight',
 'Tomato_Bacterial_spot',
 'Tomato_Early_blight',
 'Tomato_healthy',
 'Tomato_Late_blight',
 'Tomato_Leaf_Mold']

# Step 2: Data Preprocessing

### 2.1 Handle Missing Values

In [5]:
# For irrigation method, fill missing with mode (most frequent)
crop_df['Irrigation Method'].fillna(crop_df['Irrigation Method'].mode()[0], inplace=True)

# For Yield and Previous Year Yield, drop rows with missing values
crop_df = crop_df.dropna(subset=['Yield (Kg per ha)', 'Previous Year Yield (Kg per ha)'])


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  crop_df['Irrigation Method'].fillna(crop_df['Irrigation Method'].mode()[0], inplace=True)


### 2.2 Convert Area to acres

In [6]:
# 1 ha = 2.47105 acres
crop_df['Area (acres)'] = crop_df['Area (1000 ha)'] * 1000 * 2.47105


### 2.3 Encode Categorical Features

In [7]:
from sklearn.preprocessing import LabelEncoder

le_state = LabelEncoder()
le_district = LabelEncoder()
le_crop = LabelEncoder()
le_irrigation = LabelEncoder()

crop_df['State_enc'] = le_state.fit_transform(crop_df['State'])
crop_df['District_enc'] = le_district.fit_transform(crop_df['District'])
crop_df['Crop_enc'] = le_crop.fit_transform(crop_df['Crop'])
crop_df['Irrigation_enc'] = le_irrigation.fit_transform(crop_df['Irrigation Method'])


### 2.4 Select Features and Target

In [8]:
features = ['Area (acres)', 'Crop_enc', 'Previous Year Yield (Kg per ha)',
            'Irrigation_enc', 'Average Temperature (°C)', 'Total Rainfall (mm)',
            'Humidity (%)', 'Soil pH', 'Soil Moisture (%)', 'Nitrogen (N) (%)',
            'Phosphorus (P) (%)', 'Potassium (K) (%)']

target = 'Yield (Kg per ha)'

X = crop_df[features]
y = crop_df[target]


### 2.5 Split Dataset for Training

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Step 3: Train Crop Yield Prediction Model

In [10]:
# Using Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Initialize model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train model
rf_model.fit(X_train, y_train)

# Predict on test set
y_pred = rf_model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("RMSE:", rmse)
print("R2 Score:", r2)


RMSE: 168.24234360236522
R2 Score: 0.9824041071050899


# Step 4: Save the Model

In [11]:
import pickle

# Save model
with open('yield_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

# Save LabelEncoders too for later use in Streamlit
with open('le_crop.pkl', 'wb') as f:
    pickle.dump(le_crop, f)
with open('le_irrigation.pkl', 'wb') as f:
    pickle.dump(le_irrigation, f)


# Step 5: Disease Detection (DL Part)

### 5.1 Import Libraries

In [12]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


### 5.2 Define Paths

In [13]:
base_dir = 'PlantVillage'

# List some folders (disease classes)
classes = os.listdir(base_dir)
print(classes[:10])


['Pepper__bell___Bacterial_spot', 'Pepper__bell___healthy', 'Potato___Early_blight', 'Potato___healthy', 'Potato___Late_blight', 'Tomato_Bacterial_spot', 'Tomato_Early_blight', 'Tomato_healthy', 'Tomato_Late_blight', 'Tomato_Leaf_Mold']


### 5.3 Image Data Preprocessing

In [14]:
# Resize images to 224x224 (for MobileNetV2)
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # 20% for validation
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True
)

train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)


Found 16516 images belonging to 15 classes.
Found 4122 images belonging to 15 classes.


### 5.4 Build the CNN Model (Transfer Learning with MobileNetV2)

In [15]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_model.trainable = False  # Freeze base layers

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
predictions = Dense(len(classes), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0001), 
              loss='categorical_crossentropy', metrics=['accuracy'])


### 5.5 Train the Model

In [17]:
EPOCHS = 10

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m938s[0m 2s/step - accuracy: 0.3503 - loss: 2.0937 - val_accuracy: 0.6460 - val_loss: 1.3218
Epoch 2/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m708s[0m 1s/step - accuracy: 0.6255 - loss: 1.2497 - val_accuracy: 0.7377 - val_loss: 0.9403
Epoch 3/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m689s[0m 1s/step - accuracy: 0.6996 - loss: 0.9801 - val_accuracy: 0.7656 - val_loss: 0.7866
Epoch 4/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m671s[0m 1s/step - accuracy: 0.7422 - loss: 0.8372 - val_accuracy: 0.7960 - val_loss: 0.6878
Epoch 5/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m632s[0m 1s/step - accuracy: 0.7689 - loss: 0.7512 - val_accuracy: 0.8149 - val_loss: 0.6212
Epoch 6/10
[1m517/517[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m610s[0m 1s/step - accuracy: 0.7864 - loss: 0.6816 - val_accuracy: 0.8275 - val_loss: 0.5716
Epoch 7/10
[1m517/517

In [18]:
# Save the disease detection model
model.save('plant_disease_model.h5')




### 5.6 Map Diseases to Precautions

In [19]:
# ✅ Plant Disease Precautions Dictionary
disease_precautions = {
    # 🍅 Tomato
    'Tomato___Bacterial_spot': 'Use certified seeds, apply copper-based fungicide, and avoid overhead watering.',
    'Tomato___Early_blight': 'Use fungicides containing chlorothalonil or mancozeb; remove infected leaves.',
    'Tomato___Late_blight': 'Remove infected leaves, apply metalaxyl fungicide, and improve field drainage.',
    'Tomato___Leaf_Mold': 'Increase air circulation, reduce humidity, and apply copper-based sprays.',
    'Tomato___Septoria_leaf_spot': 'Remove debris, rotate crops, and apply fungicides like mancozeb.',
    'Tomato___Spider_mites_Two_spotted_spider_mite': 'Spray neem oil, avoid dry dusty conditions, and maintain field hygiene.',
    'Tomato___Target_Spot': 'Use fungicide sprays and avoid overhead irrigation.',
    'Tomato___Tomato_Yellow_Leaf_Curl_Virus': 'Control whiteflies, remove infected plants, and use resistant varieties.',
    'Tomato___Tomato_mosaic_virus': 'Disinfect tools, wash hands, and use virus-free seeds.',
    'Tomato___Healthy': 'No disease detected. Maintain good irrigation and nutrient management.',

    # 🥔 Potato
    'Potato___Early_blight': 'Use certified seed, rotate crops, and apply fungicides containing chlorothalonil.',
    'Potato___Late_blight': 'Destroy infected plants, apply copper oxychloride spray, and avoid waterlogging.',
    'Potato___Healthy': 'No disease detected. Continue standard fertilizer and irrigation schedule.',

    # 🍌 Banana
    'Banana___Black_sigatoka': 'Prune affected leaves, improve air circulation, and spray propiconazole.',
    'Banana___Healthy': 'No disease detected. Keep the field clean and maintain soil fertility.',

    # 🍎 Apple
    'Apple___Apple_scab': 'Apply sulfur or copper fungicide before rainfall and prune infected branches.',
    'Apple___Black_rot': 'Remove mummified fruits, disinfect pruning tools, and use fungicide sprays.',
    'Apple___Cedar_apple_rust': 'Remove nearby juniper trees, spray myclobutanil fungicide, and prune infected areas.',
    'Apple___Healthy': 'No disease detected. Regular pruning and balanced fertilization recommended.',

    # 🍇 Grape
    'Grape___Black_rot': 'Remove infected leaves and berries, apply copper fungicides, and ensure air movement.',
    'Grape___Esca_Black_Measles': 'Avoid pruning during wet weather and remove infected vines.',
    'Grape___Leaf_blight_Isariopsis_Leaf_Spot': 'Use mancozeb spray and ensure adequate sunlight penetration.',
    'Grape___Healthy': 'No disease detected. Maintain pruning schedule and pest control.',

    # 🍊 Citrus
    'Citrus___Greening': 'Control psyllid insects with insecticides, remove infected trees, and use disease-free plants.',
    'Citrus___Healthy': 'No disease detected. Maintain adequate irrigation and fertilizer balance.',

    # 🌾 Corn (Maize)
    'Corn___Cercospora_leaf_spot_Gray_leaf_spot': 'Use resistant hybrids, rotate crops, and apply fungicides early.',
    'Corn___Common_rust': 'Plant resistant varieties and apply strobilurin fungicides when necessary.',
    'Corn___Northern_Leaf_Blight': 'Use resistant seeds, apply mancozeb, and avoid overhead watering.',
    'Corn___Healthy': 'No disease detected. Keep soil well-drained and avoid overcrowding.',

    # 🍓 Strawberry
    'Strawberry___Leaf_scorch': 'Remove infected leaves, improve spacing, and apply captan fungicide.',
    'Strawberry___Healthy': 'No disease detected. Maintain proper spacing and irrigation.',

    # 🌶️ Pepper (Bell)
    'Pepper_bell___Bacterial_spot': 'Apply copper fungicide weekly and avoid working in wet fields.',
    'Pepper_bell___Healthy': 'No disease detected. Keep soil nutrients balanced.',

    # 🍠 Cassava
    'Cassava___Bacterial_Blight': 'Use resistant varieties and disinfect tools before pruning.',
    'Cassava___Brown_Streak_Disease': 'Use clean planting material and remove infected plants.',
    'Cassava___Mosaic_Disease': 'Control whiteflies and use disease-free cuttings.',
    'Cassava___Healthy': 'No disease detected. Continue normal irrigation.',

    # 🍅 Other
    'Soybean___Healthy': 'No disease detected. Maintain crop rotation and weed control.',
    'Squash___Powdery_mildew': 'Apply sulfur-based fungicide and ensure proper ventilation.',
    'Blueberry___Healthy': 'No disease detected. Maintain soil pH between 4.5–5.5 and avoid waterlogging.',
    'Raspberry___Healthy': 'No disease detected. Use drip irrigation and avoid overhead watering.',
}


# Step 6: Integration (Farmer Simulation)

### 6.1 Load the Saved Models

In [21]:
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np

# Load ML model and encoders
with open('yield_model.pkl', 'rb') as f:
    yield_model = pickle.load(f)

with open('le_crop.pkl', 'rb') as f:
    le_crop = pickle.load(f)
with open('le_irrigation.pkl', 'rb') as f:
    le_irrigation = pickle.load(f)

# Load DL model
disease_model = load_model('plant_disease_model.h5')




### 6.2 Define the Crop Yield Prediction Function

In [22]:
def predict_yield(crop_name, area_acres, prev_yield, irrigation_type):
    # Example: Auto-generated environmental values (in real case use APIs)
    avg_temp = 30.5
    rainfall = 110.0
    humidity = 65.0
    soil_ph = 6.5
    soil_moisture = 22.3
    nitrogen = 1.2
    phosphorus = 0.9
    potassium = 1.1

    # Encode categorical values
    crop_encoded = le_crop.transform([crop_name])[0]
    irrigation_encoded = le_irrigation.transform([irrigation_type])[0]

    # Create input array
    input_data = np.array([[area_acres, crop_encoded, prev_yield, irrigation_encoded,
                            avg_temp, rainfall, humidity, soil_ph,
                            soil_moisture, nitrogen, phosphorus, potassium]])

    # Predict yield
    predicted_yield = yield_model.predict(input_data)[0]
    print(f"🌾 Predicted Crop Yield for {crop_name}: {predicted_yield:.2f} kg/ha")


### 6.3 Define the Disease Detection Function

In [23]:
def detect_disease(img_path):
    # Load and preprocess image
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    # Predict
    prediction = disease_model.predict(img_array)
    predicted_class = np.argmax(prediction, axis=1)[0]
    class_name = list(disease_precautions.keys())[predicted_class]
    precaution = disease_precautions[class_name]

    print(f"🍃 Detected Disease: {class_name}")
    print(f"💡 Precaution: {precaution}")


### 6.4 Test Both Functions

In [26]:
# For yield prediction:
predict_yield('COTTON', area_acres=10, prev_yield=2500, irrigation_type='Drip')


🌾 Predicted Crop Yield for COTTON: 2470.37 kg/ha




In [35]:
# For disease detection:
detect_disease(r'C:\Users\HP\Desktop\ML Projet 2\PlantVillage\Pepper__bell___Bacterial_spot\0022d6b7-d47c-4ee2-ae9a-392a53f48647___JR_B.Spot 8964.JPG')  # Replace with your test image path


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
🍃 Detected Disease: Tomato___Bacterial_spot
💡 Precaution: Use certified seeds, apply copper-based fungicide, and avoid overhead watering.


# Step 7: Streamlit App Integration

In [None]:
# see it in app.py