In [46]:
import numpy as np
import pandas as pd

# Define the ranges for each nutrient
nutrient_ranges = {
    'zinc': [5, 20],
    'boron': [0.5, 2],
    'phosphorus': [50, 75],
    'potassium': [125, 145],
    'sulphur': [12, 15]
}

# Function to generate random values within the specified range for each nutrient
def generate_cotton_yield_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val, max_val, num_samples)
    return data

# Function to generate random values outside the specified range for each nutrient
def generate_out_of_range_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val - 10, max_val + 10, num_samples)
    return data

# Function to label yield as 'good' or 'poor' based on whether values are within range
def label_yield(data):
    yield_labels = []
    for i in range(len(data['zinc'])):
        if all(data[nutrient][i] >= nutrient_ranges[nutrient][0] and data[nutrient][i] <= nutrient_ranges[nutrient][1] for nutrient in nutrient_ranges):
            yield_labels.append('good')
        else:
            yield_labels.append('poor')
    return yield_labels

# Generate cotton yield dataset with values within specified ranges
cotton_yield_data_within_range = generate_cotton_yield_data(2340)

# Generate cotton yield dataset with values outside specified ranges
cotton_yield_data_out_of_range = generate_out_of_range_data(2660)

# Combine the datasets
cotton_yield_data_combined = {nutrient: np.concatenate([cotton_yield_data_within_range[nutrient],
                                                       cotton_yield_data_out_of_range[nutrient]])
                              for nutrient in nutrient_ranges.keys()}

# Create a DataFrame
df_cotton_yield = pd.DataFrame(cotton_yield_data_combined)

# Label the yield
df_cotton_yield['yield_label'] = label_yield(df_cotton_yield)

# Print the first few samples to verify
print(df_cotton_yield.head())


        zinc     boron  phosphorus   potassium    sulphur yield_label
0   6.604998  0.576969   58.544376  139.312438  14.055820        good
1  11.607571  1.784768   66.087984  140.647837  13.060296        good
2  15.956863  1.788620   68.258460  134.941835  14.144668        good
3  10.319809  1.176548   72.832470  125.094218  14.476108        good
4  15.747134  0.701805   51.662035  131.693740  12.796272        good


In [47]:
df_cotton_yield3 = df_cotton_yield.sort_values(by=list(nutrient_ranges.keys()))
df_cotton_yield3.head(50)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
2760,-4.98586,11.379537,60.72386,133.761055,19.020198,poor
4052,-4.97173,6.431595,52.509273,117.136176,2.87979,poor
3349,-4.968038,-3.239095,73.017138,124.459326,13.423788,poor
2380,-4.96386,-0.929906,68.522227,129.307006,10.617449,poor
2499,-4.96183,-6.430905,59.9614,133.438512,24.942793,poor
3958,-4.96074,1.556391,66.16574,152.667688,7.410583,poor
4854,-4.955586,-1.46335,67.266306,127.931917,12.149704,poor
2767,-4.952071,-9.200272,72.490705,119.633711,2.442376,poor
3571,-4.945441,4.908364,56.265653,154.721049,13.922085,poor
2444,-4.940532,7.616248,83.775447,136.755622,4.104017,poor


In [48]:
data = df_cotton_yield3.copy()

In [49]:
data.head()

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
2760,-4.98586,11.379537,60.72386,133.761055,19.020198,poor
4052,-4.97173,6.431595,52.509273,117.136176,2.87979,poor
3349,-4.968038,-3.239095,73.017138,124.459326,13.423788,poor
2380,-4.96386,-0.929906,68.522227,129.307006,10.617449,poor
2499,-4.96183,-6.430905,59.9614,133.438512,24.942793,poor


In [50]:
data['yield_label'] = data['yield_label'].map({'good': 1, 'poor': 2})

In [51]:
data['yield_label'].value_counts()

yield_label
2    2659
1    2341
Name: count, dtype: int64

In [52]:
data.head()


Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
2760,-4.98586,11.379537,60.72386,133.761055,19.020198,2
4052,-4.97173,6.431595,52.509273,117.136176,2.87979,2
3349,-4.968038,-3.239095,73.017138,124.459326,13.423788,2
2380,-4.96386,-0.929906,68.522227,129.307006,10.617449,2
2499,-4.96183,-6.430905,59.9614,133.438512,24.942793,2


In [53]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [54]:
import joblib as joblib
x = data.drop('yield_label', axis=1)
y = data['yield_label']

scaler = StandardScaler()

x = scaler.fit_transform(x)
joblib.dump(scaler, 'D:/Crop-Recommendation/Model/scaler.pkl')

xtrain , xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [55]:
model = Sequential()
model.add(Dense(64, input_shape=(5,), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [56]:
history = model.fit(xtrain, ytrain, epochs=20, batch_size=32, validation_data=(xtest, ytest))

Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4597 - loss: -3.4324 - val_accuracy: 0.4750 - val_loss: -59.5315
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4635 - loss: -190.6192 - val_accuracy: 0.4750 - val_loss: -1107.3004
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4630 - loss: -2034.9237 - val_accuracy: 0.4750 - val_loss: -6212.7915
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4576 - loss: -9356.7246 - val_accuracy: 0.4750 - val_loss: -20443.8047
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4728 - loss: -27148.2461 - val_accuracy: 0.4750 - val_loss: -50677.6758
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4655 - loss: -64178.0703 - val_accuracy: 0.4750 - val

In [57]:
test_loss, test_acc = model.evaluate(xtest, ytest)
print('Test accuracy:', test_acc)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4628 - loss: -8530489.0000 
Test accuracy: 0.4749999940395355


In [58]:
model.save("recommended_model.h5")



In [59]:
def recommend_soil(zinc, boron, phosphorus, potassium, sulphur):
    """
    Function to predict whether the soil is recommended (good) or not recommended (poor).
    
    Parameters:
    zinc (float): Zinc content in the soil.
    boron (float): Boron content in the soil.
    phosphorus (float): Phosphorus content in the soil.
    potassium (float): Potassium content in the soil.
    sulphur (float): Sulphur content in the soil.

    Returns:
    str: 'Recommended' if the soil is good, 'Not Recommended' if the soil is poor.
    """
    # Create an array with the input data
    input_data = np.array([zinc, boron, phosphorus, potassium, sulphur]).reshape(1, -1)
    
    # Normalize the input data using the same scaler as during training
    input_data_normalized = scaler.transform(input_data)
    
    # Make a prediction using the trained model
    prediction = model.predict(input_data_normalized)
    
    # Interpret the prediction (threshold of 0.5)
    if prediction[0] >= 0.5:
        return 'Recommended'
    else:
        return 'Not Recommended'

# Example usage
zinc = 168
boron = 2
phosphorus = 10
potassium = 73.316610
sulphur = 118.717791

result = recommend_soil(zinc, boron, phosphorus, potassium, sulphur)
print(f"The soil is {result}.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
The soil is Recommended.




In [60]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

for i in [SVC, RandomForestClassifier, LogisticRegression,KNeighborsClassifier]:
    model = i()
    model.fit(xtrain, ytrain)
    print(f'{i.__name__} train accuracy: {model.score(xtrain, ytrain)}')
    print(f'{i.__name__} test accuracy: {model.score(xtest, ytest)}')
    print('')

SVC train accuracy: 0.99025
SVC test accuracy: 0.992

RandomForestClassifier train accuracy: 1.0
RandomForestClassifier test accuracy: 0.998

LogisticRegression train accuracy: 0.445
LogisticRegression test accuracy: 0.423

KNeighborsClassifier train accuracy: 0.98375
KNeighborsClassifier test accuracy: 0.979

