In [1]:
import numpy as np
import pandas as pd

# Define the ranges for each nutrient
nutrient_ranges = {
    'zinc': [5, 20],
    'boron': [0.5, 2],
    'phosphorus': [50, 75],
    'potassium': [125, 145],
    'sulphur': [12, 15],
    'Nitrogen':[40.36,112.10],
    'Temperature':[25,45]
    }

# Function to generate random values within the specified range for each nutrient
def generate_cotton_yield_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val, max_val, num_samples)
    return data

# Function to generate random values outside the specified range for each nutrient
def generate_out_of_range_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val - 10, max_val + 10, num_samples)
    return data

# Function to label yield as 'good' or 'poor' based on whether values are within range
def label_yield(data):
    yield_labels = []
    for i in range(len(data['zinc'])):
        if all(data[nutrient][i] >= nutrient_ranges[nutrient][0] and data[nutrient][i] <= nutrient_ranges[nutrient][1] for nutrient in nutrient_ranges):
            yield_labels.append('good')
        else:
            yield_labels.append('poor')
    return yield_labels

# Generate cotton yield dataset with values within specified ranges
cotton_yield_data_within_range = generate_cotton_yield_data(100)

# Generate cotton yield dataset with values outside specified ranges
cotton_yield_data_out_of_range = generate_out_of_range_data(100)

# Combine the datasets
cotton_yield_data_combined = {nutrient: np.concatenate([cotton_yield_data_within_range[nutrient],
                                                       cotton_yield_data_out_of_range[nutrient]])
                              for nutrient in nutrient_ranges.keys()}

# Create a DataFrame
df_cotton_yield = pd.DataFrame(cotton_yield_data_combined)

# Label the yield
df_cotton_yield['yield_label'] = label_yield(df_cotton_yield)

# Print the first few samples to verify
print(df_cotton_yield.head())


        zinc     boron  phosphorus   potassium    sulphur   Nitrogen  \
0   9.386883  1.289138   51.566593  139.362686  14.695893  99.114263   
1  17.305192  1.143281   68.147560  144.233567  12.538503  84.646272   
2   8.410237  0.761249   53.470965  140.768131  12.417373  64.135177   
3  19.189774  1.996488   69.027961  128.872403  12.785331  83.303761   
4  17.363652  0.950934   65.920946  133.848309  14.797143  76.124316   

   Temperature yield_label  
0    42.015150        good  
1    40.725207        good  
2    43.239144        good  
3    44.175606        good  
4    36.106403        good  


In [2]:
df_cotton_yield3 = df_cotton_yield.sort_values(by=list(nutrient_ranges.keys()))
df_cotton_yield3.head(50)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature,yield_label
155,-4.008313,1.85082,41.140167,130.090852,16.535475,56.998854,27.986338,poor
127,-3.602991,-2.664874,51.741896,122.328534,9.649472,53.216731,38.837629,poor
187,-3.185302,-9.067488,46.93679,147.051321,24.836882,42.602221,44.279983,poor
192,-1.628382,-8.321522,59.265835,124.100352,15.044932,103.88534,50.769776,poor
143,-1.335524,-1.943361,74.656686,122.663251,16.802752,39.16611,41.405688,poor
165,-1.152659,5.918019,75.605107,130.172008,14.13941,70.678928,36.513306,poor
142,-0.418465,1.222975,43.204769,134.618281,2.955677,55.25139,54.880664,poor
178,0.579392,3.631023,74.959542,122.763756,13.449928,101.052973,39.018137,poor
141,0.706977,7.418256,55.579251,123.445495,2.365737,76.270544,54.384068,poor
151,0.887437,-4.485162,73.20491,147.460967,22.758601,34.367926,26.138227,poor


In [6]:
df_cotton_yield3['yield_label'].unique()

array(['poor', 'good'], dtype=object)

In [8]:
df_cotton_yield3.isna().sum()

zinc           0
boron          0
phosphorus     0
potassium      0
sulphur        0
Nitrogen       0
Temperature    0
yield_label    0
dtype: int64

In [10]:
X = df_cotton_yield3.drop('yield_label',axis=1)
y = df_cotton_yield3['yield_label']
X

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
155,-4.008313,1.850820,41.140167,130.090852,16.535475,56.998854,27.986338
127,-3.602991,-2.664874,51.741896,122.328534,9.649472,53.216731,38.837629
187,-3.185302,-9.067488,46.936790,147.051321,24.836882,42.602221,44.279983
192,-1.628382,-8.321522,59.265835,124.100352,15.044932,103.885340,50.769776
143,-1.335524,-1.943361,74.656686,122.663251,16.802752,39.166110,41.405688
...,...,...,...,...,...,...,...
120,29.144455,8.384851,75.551352,142.067366,16.254274,54.942174,24.778250
153,29.300930,-6.177205,65.862292,146.591751,16.364957,91.471286,39.090472
148,29.388026,7.244039,72.823408,151.056196,23.162352,54.165780,16.105132
138,29.421729,5.308305,54.558599,154.077660,17.529371,63.443398,24.074818


In [15]:
y.value_counts()

yield_label
poor    100
good    100
Name: count, dtype: int64

In [None]:
one_hot_class = one_hot_encoder.transform(np.array([[data_class]]).reshape(-1, 1))

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam



X = df_cotton_yield3.drop('yield_label',axis=1)
y = df_cotton_yield3['yield_label']


scaler = MinMaxScaler()
X = scaler.fit_transform(X)


real_data = pd.DataFrame(X, columns=['zinc', 'boron', 'phosphorus', 'potassium','sulphur','Nitrogen','Temperature'])

real_labels = y


one_hot_encoder = OneHotEncoder(sparse=False)
one_hot_labels = one_hot_encoder.fit_transform(np.array(real_labels).reshape(-1, 1))
#one_hot_labels = one_hot_encoder.fit_transform(real_labels)
# Constants
NOISE_DIM = 100
NUM_CLASSES = 2
NUM_FEATURES = 7
BATCH_SIZE = 64
TRAINING_STEPS = 1000

# Generator
def create_generator():
    noise_input = Input(shape=(NOISE_DIM,))
    class_input = Input(shape=(NUM_CLASSES,))
    merged_input = Concatenate()([noise_input, class_input])
    hidden = Dense(128, activation='relu')(merged_input)
    output = Dense(NUM_FEATURES, activation='linear')(hidden)
    model = Model(inputs=[noise_input, class_input], outputs=output)
    return model

# Discriminator
def create_discriminator():
    data_input = Input(shape=(NUM_FEATURES,))
    class_input = Input(shape=(NUM_CLASSES,))
    merged_input = Concatenate()([data_input, class_input])
    hidden = Dense(128, activation='relu')(merged_input)
    output = Dense(1, activation='sigmoid')(hidden)
    model = Model(inputs=[data_input, class_input], outputs=output)
    return model

# cGAN
def create_cgan(generator, discriminator):
    noise_input = Input(shape=(NOISE_DIM,))
    class_input = Input(shape=(NUM_CLASSES,))
    generated_data = generator([noise_input, class_input])
    validity = discriminator([generated_data, class_input])
    model = Model(inputs=[noise_input, class_input], outputs=validity)
    return model

# Create and compile the Discriminator
discriminator = create_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=Adam())

# Create the Generator
generator = create_generator()

# Create the GAN
gan = create_cgan(generator, discriminator)

# Ensure that only the generator is trained
discriminator.trainable = False

gan.compile(loss='binary_crossentropy', optimizer=Adam())

# Train GAN
for step in range(TRAINING_STEPS):
    # Select a random batch of real data with labels
    idx = np.random.randint(0, real_data.shape[0], BATCH_SIZE)
    real_batch = real_data.iloc[idx].values
    labels_batch = one_hot_labels[idx]

    # Generate a batch of new data
    noise = np.random.normal(0, 1, (BATCH_SIZE, NOISE_DIM))
    generated_batch = generator.predict([noise, labels_batch])

    # Train the discriminator
    real_loss = discriminator.train_on_batch([real_batch, labels_batch], np.ones((BATCH_SIZE, 1)))
    fake_loss = discriminator.train_on_batch([generated_batch, labels_batch], np.zeros((BATCH_SIZE, 1)))
    discriminator_loss = 0.5 * np.add(real_loss, fake_loss)

    # Train the generator
    generator_loss = gan.train_on_batch([noise, labels_batch], np.ones((BATCH_SIZE, 1)))

    if step % 500 == 0:
        print(f"Step: {step}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}")

# Generate instances for a given class
def generate_data(generator, data_class, num_instances):
    #one_hot_class = one_hot_encoder.transform(np.array([[data_class]]))
  #  one_hot_class = one_hot_encoder.transform(np.array([[data_class]]).reshape(-1, 1))
    one_hot_class=data_class
    noise = np.random.normal(0, 1, (num_instances, NOISE_DIM))
    generated_data = generator.predict([noise, np.repeat(one_hot_class, num_instances, axis=0)])
    return pd.DataFrame(generated_data, columns=['zinc', 'boron', 'phosphorus', 'potassium','sulphur','Nitrogen','Temperature'])

# Generate instances of class here 0 :poor and good : good

# no errors all set


In [21]:
generated_poor = generate_data(generator, [[1,0]], 50)



In [22]:
generated_poor

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
0,0.404322,0.39535,0.408068,0.594296,0.499188,0.348378,0.481283
1,0.569289,0.470879,0.352825,0.546183,0.558559,0.515962,0.40894
2,0.434,0.554543,0.474116,0.646109,0.516875,0.540338,0.614155
3,0.550997,0.550775,0.580221,0.7231,0.595984,0.800852,0.462921
4,0.709618,0.712012,0.57153,0.816391,0.597345,0.815619,0.869157
5,0.665248,0.412316,0.739725,0.760866,0.603066,0.878434,0.643543
6,0.558688,0.417628,0.743002,0.853822,0.48302,0.498157,0.526857
7,0.525072,0.558897,0.764286,0.972646,0.477884,0.628996,0.556257
8,0.563592,0.390934,0.487174,0.725248,0.527025,0.533795,0.49974
9,0.609625,0.682477,0.351272,0.629131,0.705845,0.781724,0.807309


In [24]:
generated_good = generate_data(generator, [[0,1]], 50)



In [25]:
generated_good

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
0,0.466889,0.445833,0.618731,0.519911,0.414353,0.720578,0.562401
1,0.494989,0.320191,0.699831,0.460067,0.458721,0.78206,0.340835
2,0.584385,0.442802,0.490338,0.734035,0.479273,0.475084,0.728406
3,0.515967,0.449148,0.47133,0.601785,0.468695,0.668068,0.627634
4,0.585718,0.386327,0.464826,0.6438,0.400739,0.37823,0.611062
5,0.643955,0.455473,0.616033,0.958037,0.389136,0.487288,0.322443
6,0.660703,0.562029,0.448755,0.752358,0.732656,0.647054,0.597089
7,0.543054,0.353844,0.368309,0.625832,0.465236,0.490277,0.759703
8,0.704384,0.4782,0.694476,0.620408,0.521821,0.375769,0.507446
9,0.351579,0.267588,0.413767,0.584072,0.364435,0.397443,0.588324


In [26]:
synthetic_data = pd.concat([generated_poor, generated_good], ignore_index=True)
synthetic_data = pd.DataFrame(scaler.inverse_transform(synthetic_data), columns=['zinc', 'boron', 'phosphorus', 'potassium','sulphur','Nitrogen','Temperature'])

In [27]:
synthetic_data

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
0,9.606263,-0.903942,57.784443,138.759201,13.638749,62.144588,34.247986
1,15.161116,0.695910,55.441891,136.836945,14.979509,77.066223,31.369473
2,10.605600,2.468094,60.585190,140.829269,14.038166,79.236671,39.534882
3,14.545186,2.388283,65.084549,143.905258,15.824662,102.432777,33.517372
4,19.886360,5.803608,64.716042,147.632538,15.855391,103.747536,49.681263
...,...,...,...,...,...,...,...
95,18.316696,0.629737,64.528900,152.677200,16.509245,74.396744,37.633030
96,15.641368,1.257488,73.205215,141.232040,11.846260,88.029877,37.134926
97,17.562988,2.495688,62.877777,143.655853,16.048128,104.583054,40.942913
98,15.816070,2.012760,60.442490,153.416016,22.609171,79.132195,36.850502


In [28]:
synthetic_labels = [0]*50 + [1]*50
synthetic_data['yield'] = synthetic_labels

# Save synthetic data as a CSV file
#synthetic_data.to_csv('synthetic_iris_data.csv', index=False)

In [29]:
synthetic_data

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature,yield
0,9.606263,-0.903942,57.784443,138.759201,13.638749,62.144588,34.247986,0
1,15.161116,0.695910,55.441891,136.836945,14.979509,77.066223,31.369473,0
2,10.605600,2.468094,60.585190,140.829269,14.038166,79.236671,39.534882,0
3,14.545186,2.388283,65.084549,143.905258,15.824662,102.432777,33.517372,0
4,19.886360,5.803608,64.716042,147.632538,15.855391,103.747536,49.681263,0
...,...,...,...,...,...,...,...,...
95,18.316696,0.629737,64.528900,152.677200,16.509245,74.396744,37.633030,1
96,15.641368,1.257488,73.205215,141.232040,11.846260,88.029877,37.134926,1
97,17.562988,2.495688,62.877777,143.655853,16.048128,104.583054,40.942913,1
98,15.816070,2.012760,60.442490,153.416016,22.609171,79.132195,36.850502,1
