In [42]:
import numpy as np
import pandas as pd

# Define the ranges for each nutrient
nutrient_ranges = {
    'zinc': [5, 20],
    'boron': [0.5, 2],
    'phosphorus': [50, 75],
    'potassium': [125, 145],
    'sulphur': [12, 15]
}

# Function to generate random values within the specified range for each nutrient
def generate_cotton_yield_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val, max_val, num_samples)
    return data

# Function to generate random values outside the specified range for each nutrient
def generate_out_of_range_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val - 10, max_val + 10, num_samples)
    return data

# Function to label yield as 'good' or 'poor' based on whether values are within range
def label_yield(data):
    yield_labels = []
    for i in range(len(data['zinc'])):
        if all(data[nutrient][i] >= nutrient_ranges[nutrient][0] and data[nutrient][i] <= nutrient_ranges[nutrient][1] for nutrient in nutrient_ranges):
            yield_labels.append('good')
        else:
            yield_labels.append('poor')
    return yield_labels

# Generate cotton yield dataset with values within specified ranges
cotton_yield_data_within_range = generate_cotton_yield_data(2340)

# Generate cotton yield dataset with values outside specified ranges
cotton_yield_data_out_of_range = generate_out_of_range_data(2660)

# Combine the datasets
cotton_yield_data_combined = {nutrient: np.concatenate([cotton_yield_data_within_range[nutrient],
                                                       cotton_yield_data_out_of_range[nutrient]])
                              for nutrient in nutrient_ranges.keys()}

# Create a DataFrame
df_cotton_yield = pd.DataFrame(cotton_yield_data_combined)

# Label the yield
df_cotton_yield['yield_label'] = label_yield(df_cotton_yield)

# Print the first few samples to verify
print(df_cotton_yield.head())


        zinc     boron  phosphorus   potassium    sulphur yield_label
0  16.429288  1.740906   66.859624  126.992698  12.872744        good
1  13.010484  1.014565   73.229348  130.074248  13.040616        good
2   7.564418  1.460823   67.627394  131.590016  12.447020        good
3   7.438058  1.808909   71.564786  132.721591  14.687987        good
4   5.696002  0.806392   51.664437  132.271883  13.768165        good


In [43]:
df_cotton_yield3 = df_cotton_yield.sort_values(by=list(nutrient_ranges.keys()))
df_cotton_yield3.head(50)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
3763,-4.990822,5.572063,58.248232,138.601352,12.54222,poor
3583,-4.985146,7.8362,84.823692,140.959703,9.173206,poor
2581,-4.974225,-5.870092,65.812968,148.06444,10.569075,poor
4805,-4.943934,-2.601539,43.040225,152.05805,8.950357,poor
4144,-4.932253,10.725233,49.454902,145.01294,16.515846,poor
3716,-4.918337,-7.362598,63.262721,135.004173,18.306716,poor
3335,-4.909139,11.057832,56.053338,122.28994,22.66776,poor
2839,-4.898615,-5.398387,74.399482,127.86514,24.222623,poor
4346,-4.883809,0.444491,65.156315,143.932141,3.018672,poor
3054,-4.879993,-0.370981,70.915603,152.294592,7.86004,poor


In [44]:
data = df_cotton_yield3.copy()

In [45]:
data.head()

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
3763,-4.990822,5.572063,58.248232,138.601352,12.54222,poor
3583,-4.985146,7.8362,84.823692,140.959703,9.173206,poor
2581,-4.974225,-5.870092,65.812968,148.06444,10.569075,poor
4805,-4.943934,-2.601539,43.040225,152.05805,8.950357,poor
4144,-4.932253,10.725233,49.454902,145.01294,16.515846,poor


In [46]:
data['yield_label'] = data['yield_label'].map({'good': 1, 'poor': 0})

In [47]:
data['yield_label'].value_counts()

yield_label
0    2660
1    2340
Name: count, dtype: int64

In [48]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5000 entries, 3763 to 3915
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   zinc         5000 non-null   float64
 1   boron        5000 non-null   float64
 2   phosphorus   5000 non-null   float64
 3   potassium    5000 non-null   float64
 4   sulphur      5000 non-null   float64
 5   yield_label  5000 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 273.4 KB


In [49]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [50]:
x = data.drop('yield_label', axis=1)
y = data['yield_label']

scaler = StandardScaler()

x = scaler.fit_transform(x)

xtrain , xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [51]:
model = Sequential()
model.add(Dense(64, input_shape=(5,), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [52]:
history = model.fit(xtrain, ytrain, epochs=20, batch_size=32, validation_data=(xtest, ytest))

Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6014 - loss: 0.5648 - val_accuracy: 0.9940 - val_loss: 0.2370
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9882 - loss: 0.1492 - val_accuracy: 0.9940 - val_loss: 0.0446
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9915 - loss: 0.0391 - val_accuracy: 0.9930 - val_loss: 0.0226
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9952 - loss: 0.0204 - val_accuracy: 0.9950 - val_loss: 0.0162
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9943 - loss: 0.0212 - val_accuracy: 0.9950 - val_loss: 0.0151
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9975 - loss: 0.0113 - val_accuracy: 0.9980 - val_loss: 0.0140
Epoch 7/20
[1m125/125[0m 

In [56]:
test_loss, test_acc = model.evaluate(xtest, ytest)
print('Test accuracy:', test_acc)

AttributeError: 'LogisticRegression' object has no attribute 'evaluate'

In [None]:
def recommend_soil(zinc, boron, phosphorus, potassium, sulphur):
    """
    Function to predict whether the soil is recommended (good) or not recommended (poor).
    
    Parameters:
    zinc (float): Zinc content in the soil.
    boron (float): Boron content in the soil.
    phosphorus (float): Phosphorus content in the soil.
    potassium (float): Potassium content in the soil.
    sulphur (float): Sulphur content in the soil.

    Returns:
    str: 'Recommended' if the soil is good, 'Not Recommended' if the soil is poor.
    """
    # Create an array with the input data
    input_data = np.array([zinc, boron, phosphorus, potassium, sulphur]).reshape(1, -1)
    
    # Normalize the input data using the same scaler as during training
    input_data_normalized = scaler.transform(input_data)
    
    # Make a prediction using the trained model
    prediction = model.predict(input_data_normalized)
    
    # Interpret the prediction (threshold of 0.5)
    if prediction[0] >= 0.5:
        return 'Recommended'
    else:
        return 'Not Recommended'

# Example usage
zinc = 168
boron = -4.831847
phosphorus = -9.034729
potassium = 73.316610
sulphur = 118.717791

result = recommend_soil(zinc, boron, phosphorus, potassium, sulphur)
print(f"The soil is {result}.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
The soil is Not Recommended.


In [57]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

for i in [SVC, RandomForestClassifier, LogisticRegression]:
    model = i()
    model.fit(xtrain, ytrain)
    print(f'{i.__name__} train accuracy: {model.score(xtrain, ytrain)}')
    print(f'{i.__name__} test accuracy: {model.score(xtest, ytest)}')
    print('')

SVC train accuracy: 0.98825
SVC test accuracy: 0.987

RandomForestClassifier train accuracy: 1.0
RandomForestClassifier test accuracy: 1.0

LogisticRegression train accuracy: 0.45875
LogisticRegression test accuracy: 0.45

