In [1]:
import numpy as np
import pandas as pd

# Define the ranges for each nutrient
nutrient_ranges = {
    'zinc': [5, 20],
    'boron': [0.5, 2],
    'phosphorus': [50, 75],
    'potassium': [125, 145],
    'sulphur': [12, 15]
}

# Function to generate random values within the specified range for each nutrient
def generate_cotton_yield_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val, max_val, num_samples)
    return data

# Function to generate random values outside the specified range for each nutrient
def generate_out_of_range_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val - 10, max_val + 10, num_samples)
    return data

# Function to label yield as 'good' or 'poor' based on whether values are within range
def label_yield(data):
    yield_labels = []
    for i in range(len(data['zinc'])):
        if all(data[nutrient][i] >= nutrient_ranges[nutrient][0] and data[nutrient][i] <= nutrient_ranges[nutrient][1] for nutrient in nutrient_ranges):
            yield_labels.append('good')
        else:
            yield_labels.append('poor')
    return yield_labels

# Generate cotton yield dataset with values within specified ranges
cotton_yield_data_within_range = generate_cotton_yield_data(2340)

# Generate cotton yield dataset with values outside specified ranges
cotton_yield_data_out_of_range = generate_out_of_range_data(2660)

# Combine the datasets
cotton_yield_data_combined = {nutrient: np.concatenate([cotton_yield_data_within_range[nutrient],
                                                       cotton_yield_data_out_of_range[nutrient]])
                              for nutrient in nutrient_ranges.keys()}

# Create a DataFrame
df_cotton_yield = pd.DataFrame(cotton_yield_data_combined)

# Label the yield
df_cotton_yield['yield_label'] = label_yield(df_cotton_yield)

# Print the first few samples to verify
print(df_cotton_yield.head())


        zinc     boron  phosphorus   potassium    sulphur yield_label
0   6.104667  1.562884   73.023836  143.642551  14.477351        good
1   9.934274  1.164183   68.699167  126.641033  14.136038        good
2  16.342038  1.724810   52.263720  137.369137  12.465458        good
3  16.347888  1.942708   57.259750  140.266277  12.063764        good
4  13.215537  1.912307   57.216662  142.231544  13.401403        good


In [2]:
df_cotton_yield3 = df_cotton_yield.sort_values(by=list(nutrient_ranges.keys()))
df_cotton_yield3.head(50)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
4078,-4.994465,11.17997,55.896314,135.11357,12.91937,poor
4395,-4.968449,9.129133,81.52116,154.938878,3.665471,poor
3238,-4.929686,9.484937,57.1505,129.290561,2.5367,poor
4890,-4.912708,-7.641177,56.224869,131.414311,9.996816,poor
4597,-4.910558,0.883368,77.56732,143.246765,6.360394,poor
3284,-4.903195,-7.759311,70.055182,154.468599,15.008608,poor
4171,-4.886248,-5.473225,59.645558,121.707274,20.978212,poor
2346,-4.866095,0.281604,40.002357,145.083142,13.923054,poor
2410,-4.844153,-2.378632,69.779202,123.77776,8.311208,poor
3921,-4.827561,-2.048502,83.153325,128.515181,5.435845,poor


In [3]:
data = df_cotton_yield3.copy()

In [4]:
data.head()

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
4078,-4.994465,11.17997,55.896314,135.11357,12.91937,poor
4395,-4.968449,9.129133,81.52116,154.938878,3.665471,poor
3238,-4.929686,9.484937,57.1505,129.290561,2.5367,poor
4890,-4.912708,-7.641177,56.224869,131.414311,9.996816,poor
4597,-4.910558,0.883368,77.56732,143.246765,6.360394,poor


In [5]:
data['yield_label'] = data['yield_label'].map({'good': 1, 'poor': 0})

In [6]:
data['yield_label'].value_counts()

yield_label
0    2659
1    2341
Name: count, dtype: int64

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5000 entries, 4078 to 4561
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   zinc         5000 non-null   float64
 1   boron        5000 non-null   float64
 2   phosphorus   5000 non-null   float64
 3   potassium    5000 non-null   float64
 4   sulphur      5000 non-null   float64
 5   yield_label  5000 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 273.4 KB


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [10]:
import joblib as joblib
x = data.drop('yield_label', axis=1)
y = data['yield_label']

scaler = StandardScaler()

x = scaler.fit_transform(x)
joblib.dump(scaler, 'D:/Crop-Recommendation/Model/scaler.pkl')

xtrain , xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [10]:
model = Sequential()
model.add(Dense(64, input_shape=(5,), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
history = model.fit(xtrain, ytrain, epochs=20, batch_size=32, validation_data=(xtest, ytest))

Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5998 - loss: 0.5213 - val_accuracy: 0.9910 - val_loss: 0.1758
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9814 - loss: 0.1242 - val_accuracy: 0.9940 - val_loss: 0.0331
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9884 - loss: 0.0475 - val_accuracy: 0.9950 - val_loss: 0.0188
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9933 - loss: 0.0275 - val_accuracy: 0.9980 - val_loss: 0.0144
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9937 - loss: 0.0250 - val_accuracy: 0.9960 - val_loss: 0.0112
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9965 - loss: 0.0149 - val_accuracy: 0.9980 - val_loss: 0.0149
Epoch 7/20
[1m125/125[0m 

In [12]:
test_loss, test_acc = model.evaluate(xtest, ytest)
print('Test accuracy:', test_acc)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9998 - loss: 0.0074     
Test accuracy: 0.9990000128746033


In [13]:
model.save("recommended_model.h5")



In [14]:
def recommend_soil(zinc, boron, phosphorus, potassium, sulphur):
    """
    Function to predict whether the soil is recommended (good) or not recommended (poor).
    
    Parameters:
    zinc (float): Zinc content in the soil.
    boron (float): Boron content in the soil.
    phosphorus (float): Phosphorus content in the soil.
    potassium (float): Potassium content in the soil.
    sulphur (float): Sulphur content in the soil.

    Returns:
    str: 'Recommended' if the soil is good, 'Not Recommended' if the soil is poor.
    """
    # Create an array with the input data
    input_data = np.array([zinc, boron, phosphorus, potassium, sulphur]).reshape(1, -1)
    
    # Normalize the input data using the same scaler as during training
    input_data_normalized = scaler.transform(input_data)
    
    # Make a prediction using the trained model
    prediction = model.predict(input_data_normalized)
    
    # Interpret the prediction (threshold of 0.5)
    if prediction[0] >= 0.5:
        return 'Recommended'
    else:
        return 'Not Recommended'

# Example usage
zinc = 168
boron = -4.831847
phosphorus = -9.034729
potassium = 73.316610
sulphur = 118.717791

result = recommend_soil(zinc, boron, phosphorus, potassium, sulphur)
print(f"The soil is {result}.")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
The soil is Not Recommended.


In [15]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

for i in [SVC, RandomForestClassifier, LogisticRegression,KNeighborsClassifier]:
    model = i()
    model.fit(xtrain, ytrain)
    print(f'{i.__name__} train accuracy: {model.score(xtrain, ytrain)}')
    print(f'{i.__name__} test accuracy: {model.score(xtest, ytest)}')
    print('')

SVC train accuracy: 0.98675
SVC test accuracy: 0.99

RandomForestClassifier train accuracy: 1.0
RandomForestClassifier test accuracy: 1.0

LogisticRegression train accuracy: 0.47975
LogisticRegression test accuracy: 0.478

KNeighborsClassifier train accuracy: 0.98025
KNeighborsClassifier test accuracy: 0.973

