In [2]:
import numpy as np
import pandas as pd

# Define the ranges for each nutrient
nutrient_ranges = {
    'zinc': [5, 20],
    'boron': [0.5, 2],
    'phosphorus': [50, 75],
    'potassium': [125, 145],
    'sulphur': [12, 15]
}

# Function to generate random values within the specified range for each nutrient
def generate_cotton_yield_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val, max_val, num_samples)
    return data

# Function to generate random values outside the specified range for each nutrient
def generate_out_of_range_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val - 10, max_val + 10, num_samples)
    return data

# Function to label yield as 'good' or 'poor' based on whether values are within range
def label_yield(data):
    yield_labels = []
    for i in range(len(data['zinc'])):
        if all(data[nutrient][i] >= nutrient_ranges[nutrient][0] and data[nutrient][i] <= nutrient_ranges[nutrient][1] for nutrient in nutrient_ranges):
            yield_labels.append('good')
        else:
            yield_labels.append('poor')
    return yield_labels

# Generate cotton yield dataset with values within specified ranges
cotton_yield_data_within_range = generate_cotton_yield_data(2340)

# Generate cotton yield dataset with values outside specified ranges
cotton_yield_data_out_of_range = generate_out_of_range_data(2660)

# Combine the datasets
cotton_yield_data_combined = {nutrient: np.concatenate([cotton_yield_data_within_range[nutrient],
                                                       cotton_yield_data_out_of_range[nutrient]])
                              for nutrient in nutrient_ranges.keys()}

# Create a DataFrame
df_cotton_yield = pd.DataFrame(cotton_yield_data_combined)

# Label the yield
df_cotton_yield['yield_label'] = label_yield(df_cotton_yield)

# Print the first few samples to verify
print(df_cotton_yield.head())


        zinc     boron  phosphorus   potassium    sulphur yield_label
0  13.893667  0.685419   62.117980  138.846576  14.192728        good
1   5.254960  0.996996   53.041392  139.837035  13.023081        good
2  16.361049  1.875285   71.912473  133.537919  14.005643        good
3   8.689981  1.351350   61.088935  129.914856  14.608763        good
4   6.238827  1.483556   51.409592  128.724133  12.727594        good


In [3]:
df_cotton_yield3 = df_cotton_yield.sort_values(by=list(nutrient_ranges.keys()))
df_cotton_yield3.head(50)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
2417,-4.999336,-5.689999,76.168344,123.133702,7.412071,poor
4169,-4.992952,-2.837441,55.921583,144.266919,22.060134,poor
3109,-4.98746,1.681238,79.481148,127.068581,8.099472,poor
4707,-4.973438,-4.116689,52.470107,125.41644,7.156126,poor
2931,-4.932205,11.022315,67.451141,137.521051,19.223715,poor
4657,-4.927769,-8.970751,49.287392,135.859145,4.286952,poor
2872,-4.924147,1.274128,77.363785,149.482621,23.864386,poor
4448,-4.893938,-9.314031,54.183764,129.967923,5.776235,poor
3126,-4.892061,10.425517,53.770812,131.928231,20.086099,poor
3330,-4.890602,-5.83183,51.927449,117.468311,18.451727,poor


In [4]:
data = df_cotton_yield3.copy()

In [5]:
data.head()

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
2417,-4.999336,-5.689999,76.168344,123.133702,7.412071,poor
4169,-4.992952,-2.837441,55.921583,144.266919,22.060134,poor
3109,-4.98746,1.681238,79.481148,127.068581,8.099472,poor
4707,-4.973438,-4.116689,52.470107,125.41644,7.156126,poor
2931,-4.932205,11.022315,67.451141,137.521051,19.223715,poor


In [6]:
data['yield_label'] = data['yield_label'].map({'good': 1, 'poor': 0})

In [7]:
data['yield_label'].value_counts()

yield_label
0    2660
1    2340
Name: count, dtype: int64

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5000 entries, 2417 to 4117
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   zinc         5000 non-null   float64
 1   boron        5000 non-null   float64
 2   phosphorus   5000 non-null   float64
 3   potassium    5000 non-null   float64
 4   sulphur      5000 non-null   float64
 5   yield_label  5000 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 273.4 KB


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [10]:
x = data.drop('yield_label', axis=1)
y = data['yield_label']

scaler = StandardScaler()

x = scaler.fit_transform(x)

xtrain , xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [11]:
model = Sequential()
model.add(Dense(64, input_shape=(5,), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
history = model.fit(xtrain, ytrain, epochs=20, batch_size=32, validation_data=(xtest, ytest))

Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5646 - loss: 0.5788 - val_accuracy: 0.9930 - val_loss: 0.2802
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9878 - loss: 0.1713 - val_accuracy: 0.9940 - val_loss: 0.0392
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9935 - loss: 0.0324 - val_accuracy: 0.9910 - val_loss: 0.0243
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9939 - loss: 0.0264 - val_accuracy: 0.9940 - val_loss: 0.0179
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9961 - loss: 0.0157 - val_accuracy: 0.9960 - val_loss: 0.0168
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9968 - loss: 0.0142 - val_accuracy: 0.9900 - val_loss: 0.0219
Epoch 7/20
[1m125/125[0m 

In [13]:
test_loss, test_acc = model.evaluate(xtest, ytest)
print('Test accuracy:', test_acc)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 775us/step - accuracy: 0.9953 - loss: 0.0187
Test accuracy: 0.9950000047683716


In [14]:
def recommend_soil(zinc, boron, phosphorus, potassium, sulphur):
    """
    Function to predict whether the soil is recommended (good) or not recommended (poor).
    
    Parameters:
    zinc (float): Zinc content in the soil.
    boron (float): Boron content in the soil.
    phosphorus (float): Phosphorus content in the soil.
    potassium (float): Potassium content in the soil.
    sulphur (float): Sulphur content in the soil.

    Returns:
    str: 'Recommended' if the soil is good, 'Not Recommended' if the soil is poor.
    """
    # Create an array with the input data
    input_data = np.array([zinc, boron, phosphorus, potassium, sulphur]).reshape(1, -1)
    
    # Normalize the input data using the same scaler as during training
    input_data_normalized = scaler.transform(input_data)
    
    # Make a prediction using the trained model
    prediction = model.predict(input_data_normalized)
    
    # Interpret the prediction (threshold of 0.5)
    if prediction[0] >= 0.5:
        return 'Recommended'
    else:
        return 'Not Recommended'

# Example usage
zinc = 168
boron = -4.831847
phosphorus = -9.034729
potassium = 73.316610
sulphur = 118.717791

result = recommend_soil(zinc, boron, phosphorus, potassium, sulphur)
print(f"The soil is {result}.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
The soil is Not Recommended.




In [16]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

for i in [SVC, RandomForestClassifier, LogisticRegression,KNeighborsClassifier]:
    model = i()
    model.fit(xtrain, ytrain)
    print(f'{i.__name__} train accuracy: {model.score(xtrain, ytrain)}')
    print(f'{i.__name__} test accuracy: {model.score(xtest, ytest)}')
    print('')

SVC train accuracy: 0.9865
SVC test accuracy: 0.996

RandomForestClassifier train accuracy: 1.0
RandomForestClassifier test accuracy: 1.0

LogisticRegression train accuracy: 0.4835
LogisticRegression test accuracy: 0.485

KNeighborsClassifier train accuracy: 0.981
KNeighborsClassifier test accuracy: 0.977

