# **Assignment No.1**
### By BTAIMLA71

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# importing required libraries
import numpy as np
import pandas as pd
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import (
    Conv2D,
    MaxPooling2D,
    Flatten,
    Dense,
    BatchNormalization,
    Dropout,
)
from keras.preprocessing.image import img_to_array
import cv2
from tensorflow.keras.initializers import HeNormal

#### Step 1: Simple CNN for Crop Disease Detection

In [None]:
healthy_images_path = "/content/drive/MyDrive/crop-dataset/Healthy/"
disease_images_path = "/content/drive/MyDrive/crop-dataset/Late_Blight/"

In [None]:
# loading the images in a single list
X = []
Y = []

healthy = os.listdir(healthy_images_path)
late_blight = os.listdir(disease_images_path)

for i in healthy:
    # loading the images having the path i
    img = cv2.imread(healthy_images_path + i)

    # converting cv2 image to numpy array
    img = img_to_array(img)

    # appending the image to the list
    X.append(img)
    Y.append(0)


for i in late_blight:
    # loading the images having the path i
    img = cv2.imread(disease_images_path + i)

    # converting cv2 image to numpy array
    img = img_to_array(img)

    # appending the image to the list
    X.append(img)
    Y.append(1)

In [None]:
# splitting the images array into training and testing data using train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
cnn_model = Sequential()
cnn_model.add(Conv2D(32, (3, 3), input_shape=(256, 256, 3), activation='relu', kernel_initializer=HeNormal()))
cnn_model.add(BatchNormalization())
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu', kernel_initializer=HeNormal()))
cnn_model.add(Dropout(0.5))  # Optional: Dropout layer to prevent overfitting
cnn_model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

In [None]:
X_train = np.array(X_train)  # Convert to NumPy array
Y_train = np.array(Y_train)  # Convert to NumPy array
X_test = np.array(X_test)
Y_test = np.array(Y_test)

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(344, 256, 256, 3)
(344,)
(86, 256, 256, 3)
(86,)


In [None]:
# Compile the CNN model
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
cnn_model.fit(X_train, Y_train, epochs=30, batch_size=10, verbose=1)

Epoch 1/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 67ms/step - accuracy: 0.8560 - loss: 20.6748
Epoch 2/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.9841 - loss: 2.0802
Epoch 3/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.9949 - loss: 0.8121
Epoch 4/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.9763 - loss: 4.9036
Epoch 5/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.9928 - loss: 0.7361
Epoch 6/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.9962 - loss: 1.4507
Epoch 7/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.9926 - loss: 1.1229
Epoch 8/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.9873 - loss: 1.3035
Epoch 9/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7b51653c1330>

In [None]:
# Evaluating model performance
accuracy = cnn_model.evaluate(X_test, Y_test)[1]
print( " Accuracy - ", accuracy )

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 1.0000 - loss: 0.0000e+00
 Accuracy -  1.0


#### Step 2: Random Forest for Yield Prediction (Using random environmental data)

In [None]:
yield_data = pd.read_csv("/content/drive/MyDrive/crop-dataset/yield_dataset.csv")

In [None]:
yield_data.head()

Unnamed: 0,Rain Fall (mm),Fertilizer,Temperatue,Nitrogen (N),Phosphorus (P),Potassium (K),Yeild (Q/acre)
0,1230.0,80.0,28,80.0,24.0,20.0,12.0
1,480.0,60.0,36,70.0,20.0,18.0,8.0
2,1250.0,75.0,29,78.0,22.0,19.0,11.0
3,450.0,65.0,35,70.0,19.0,18.0,9.0
4,1200.0,80.0,27,79.0,22.0,19.0,11.0


In [None]:
# removing empty rows or rows having empty values
yield_data = yield_data.dropna()

In [None]:
# printing information about dataset
yield_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 99 entries, 0 to 98
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Rain Fall (mm)  99 non-null     float64
 1   Fertilizer      99 non-null     float64
 2   Temperatue      99 non-null     object 
 3   Nitrogen (N)    99 non-null     float64
 4   Phosphorus (P)  99 non-null     float64
 5   Potassium (K)   99 non-null     float64
 6   Yeild (Q/acre)  99 non-null     float64
dtypes: float64(6), object(1)
memory usage: 6.2+ KB


In [None]:
# changing temprature values from string to float using .loc function
yield_data.loc[:, "Temperatue"] = yield_data.loc[:, "Temperatue"].astype(float)


In [None]:
# printing information about dataset
yield_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 99 entries, 0 to 98
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Rain Fall (mm)  99 non-null     float64
 1   Fertilizer      99 non-null     float64
 2   Temperatue      99 non-null     object 
 3   Nitrogen (N)    99 non-null     float64
 4   Phosphorus (P)  99 non-null     float64
 5   Potassium (K)   99 non-null     float64
 6   Yeild (Q/acre)  99 non-null     float64
dtypes: float64(6), object(1)
memory usage: 6.2+ KB


In [None]:
# Create mock environmental data (rainfall, temperature, soil_quality) and yield
X = yield_data[['Rain Fall (mm)', 'Temperatue', 'Nitrogen (N)', "Phosphorus (P)", "Potassium (K)"]].values
y = yield_data[["Yeild (Q/acre)"]].values

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Random Forest Model for Yield Prediction
yield_model = RandomForestRegressor(n_estimators=100, random_state=42)
yield_model.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)


In [None]:
# evaluating model performance
accuracy = yield_model.score(X_test, y_test)
print("Accuracy - ", accuracy )

Accuracy -  0.9090613903350687


#### Step 3: Simple Recommendation System

In [None]:
def recommend(disease_prediction, yield_prediction):
    if disease_prediction >= 0.5:
        return "Disease detected! Recommended action: Apply pesticide."
    elif yield_prediction < 50:
        return "Low yield predicted! Recommended action: Improve irrigation and soil quality."
    else:
        return "Crop is healthy and yield prediction is optimal."

#### Step 4: Testing with Simulated Input Data

In [None]:
# Simulate a new test image
test_image1 = cv2.imread("/content/drive/MyDrive/crop-dataset/Late_Blight/Late_Blight (450).jpg")
test_image1 = img_to_array(test_image1)
test_image1 = np.array([test_image1])

disease_prediction1 = cnn_model.predict(test_image1) # Predict if the crop is diseased

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


In [None]:
# Simulate a new test image
test_image2 = cv2.imread("/content/drive/MyDrive/crop-dataset/Healthy/Healthy (191).jpg")
test_image2 = img_to_array(test_image2)
test_image2 = np.array([test_image2])

disease_prediction2 = cnn_model.predict(test_image2) # Predict if the crop is diseased

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


In [None]:
# Simulate new environmental data for yield prediction (rainfall, temperature, soil quality)
test_env_data = np.array([X_test[0]]) # Simulated new data
yield_prediction1 = yield_model.predict(test_env_data)[0] # Predict the yield

In [None]:
# Simulate new environmental data for yield prediction (rainfall, temperature, soil quality)
test_env_data = np.array([X_test[5]]) # Simulated new data
yield_prediction2 = yield_model.predict(test_env_data)[0] # Predict the yield

In [None]:
print("Test 1 - ")
print(" Test values - ", X_test[0])
print(" Yield Prediction - ", float( yield_prediction1 ) )
print(" Ground Truth - ", y_test[0] )

Test 1 - 
 Test values -  [1200.0 27.0 79.0 21.0 20.0]
 Yield Prediction -  11.045
 Ground Truth -  [11.]


In [None]:
print("Test 2 - ")
print(" Test values - ", X_test[5])
print(" Yield Prediction - ", float( yield_prediction2 ) )
print(" Ground Truth - ", y_test[5] )

Test 2 - 
 Test values -  [1300.0 28.0 77.0 22.0 20.0]
 Yield Prediction -  10.395
 Ground Truth -  [9.]


In [None]:
# Get recommendation based on predictions
recommendation1 = recommend(disease_prediction1, yield_prediction1)
recommendation2 = recommend(disease_prediction2, yield_prediction2)

#### Step 5: Display Output

In [None]:
print("Test 1 - ")
print(f"Disease Prediction: {disease_prediction1} (0: Healthy, 1: Diseased)")
print(f"Yield Prediction: {yield_prediction1:.2f} units")
print(f"Recommendation: {recommendation1}")

Test 1 - 
Disease Prediction: [[1.]] (0: Healthy, 1: Diseased)
Yield Prediction: 11.04 units
Recommendation: Disease detected! Recommended action: Apply pesticide.


In [None]:
print("Test 2 - ")
print(f"Disease Prediction: {disease_prediction2} (0: Healthy, 1: Diseased)")
print(f"Yield Prediction: {yield_prediction2:.2f} units")
print(f"Recommendation: {recommendation2}")

Test 2 - 
Disease Prediction: [[0.]] (0: Healthy, 1: Diseased)
Yield Prediction: 10.39 units
Recommendation: Low yield predicted! Recommended action: Improve irrigation and soil quality.


In [None]:
"""
To develop a machine learning model to build a recommendation system for crop disease

detection and yield prediction in agriculture, follow these steps:
1. data collection
you need two types of data:
● images of crop diseases: images of healthy and diseased crops for disease detection.
● agricultural/environmental data: data on weather conditions, soil properties, water
usage, fertilizers, etc., for yield prediction.
2. data preprocessing
● image data: prepare images for disease detection using techniques like resizing,
normalization, and data augmentation.
● tabular data: clean and preprocess agricultural data (handling missing values, scaling,
and feature engineering).
3. machine learning models

● cnn (convolutional neural networks): used for disease detection.
● regression models (random forest, xgboost, etc.): used for yield prediction based on
environmental data.
4. building cnn for disease detection
● use cnn for image classification to detect diseases in crop images.
● the cnn architecture typically includes convolutional layers, max pooling, flattening, and
fully connected layers.
5. building regression model for yield prediction
● use regression models to predict crop yield based on environmental factors such as
rainfall, temperature, soil quality, and fertilizer usage.
6. training and evaluation
● train the cnn model for disease detection using labeled image data.
● train the regression model for yield prediction using the preprocessed tabular data.
7. recommendation system
● based on the disease detection and yield prediction results, provide recommendations.
● if a disease is detected, recommend treatments or preventive measures.
● if the yield is predicted to be low, suggest changes in farming practices (fertilizers,
irrigation, etc.).
8. deployment
● deploy the trained models as part of a web or mobile application to allow farmers to
upload images and input data for real-time recommendations.
"""