In [6]:
import numpy as np
import pandas as pd

# Define the ranges for each nutrient
nutrient_ranges = {
    'zinc': [5, 20],
    'boron': [0.5, 2],
    'phosphorus': [50, 75],
    'potassium': [125, 145],
    'sulphur': [12, 15]
}

# Function to generate random values within the specified range for each nutrient
def generate_cotton_yield_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val, max_val, num_samples)
    return data

# Function to generate random values outside the specified range for each nutrient
def generate_out_of_range_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val - 10, max_val + 10, num_samples)
    return data

# Function to label yield as 'good' or 'poor' based on whether values are within range
def label_yield(data):
    yield_labels = []
    for i in range(len(data['zinc'])):
        if all(data[nutrient][i] >= nutrient_ranges[nutrient][0] and data[nutrient][i] <= nutrient_ranges[nutrient][1] for nutrient in nutrient_ranges):
            yield_labels.append('good')
        else:
            yield_labels.append('poor')
    return yield_labels

# Generate cotton yield dataset with values within specified ranges
cotton_yield_data_within_range = generate_cotton_yield_data(100)

# Generate cotton yield dataset with values outside specified ranges
cotton_yield_data_out_of_range = generate_out_of_range_data(100)

# Combine the datasets
cotton_yield_data_combined = {nutrient: np.concatenate([cotton_yield_data_within_range[nutrient],
                                                       cotton_yield_data_out_of_range[nutrient]])
                              for nutrient in nutrient_ranges.keys()}

# Create a DataFrame
df_cotton_yield = pd.DataFrame(cotton_yield_data_combined)

# Label the yield
df_cotton_yield['yield_label'] = label_yield(df_cotton_yield)

# Print the first few samples to verify
print(df_cotton_yield.head())


        zinc     boron  phosphorus   potassium    sulphur yield_label
0   8.256040  1.707044   69.975057  130.847861  14.866647        good
1  12.439720  1.039039   59.282875  138.018124  14.032467        good
2  15.745483  0.730231   67.196957  143.609279  13.782998        good
3  13.233898  1.206110   66.256138  128.840917  14.160800        good
4  13.110764  1.670500   63.353857  127.395619  13.631187        good


In [11]:
df_cotton_yield3 = df_cotton_yield.sort_values(by=list(nutrient_ranges.keys()))
df_cotton_yield3.head(50)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,yield_label
153,-4.130677,5.165448,43.781654,139.336226,9.746862,poor
181,-3.861896,0.337674,47.981681,144.384844,7.007542,poor
192,-2.354698,6.096986,67.544832,147.796502,16.475601,poor
195,-2.209589,6.00502,66.791942,120.834743,24.637615,poor
150,-1.866986,5.037412,55.293046,139.155873,5.884379,poor
128,-1.499164,-7.129606,53.480716,128.82759,9.379855,poor
120,-1.235524,-5.090331,49.188759,137.803568,16.309045,poor
184,-0.911229,-4.803721,42.220227,153.366836,9.201067,poor
148,-0.007132,5.957406,50.66541,143.86205,21.393057,poor
198,0.2769,7.958066,48.699309,128.980941,16.021427,poor
