In [None]:
import numpy as np
import pandas as pd

# Define the ranges for each nutrient
nutrient_ranges = {
    'zinc': [5, 20],
    'boron': [0.5, 2],
    'phosphorus': [50, 75],
    'potassium': [125, 145],
    'sulphur': [12, 15],
    'Nitrogen':[40.36,112.10],
    'Temperature':[25,45]
    }

# Function to generate random values within the specified range for each nutrient
def generate_cotton_yield_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val, max_val, num_samples)
    return data

# Function to generate random values outside the specified range for each nutrient
def generate_out_of_range_data(num_samples):
    data = {}
    for nutrient, (min_val, max_val) in nutrient_ranges.items():
        data[nutrient] = np.random.uniform(min_val - 10, max_val + 10, num_samples)
    return data

# Function to label yield as 'good' or 'poor' based on whether values are within range
def label_yield(data):
    yield_labels = []
    for i in range(len(data['zinc'])):
        if all(data[nutrient][i] >= nutrient_ranges[nutrient][0] and data[nutrient][i] <= nutrient_ranges[nutrient][1] for nutrient in nutrient_ranges):
            yield_labels.append('good')
        else:
            yield_labels.append('poor')
    return yield_labels

# Generate cotton yield dataset with values within specified ranges
cotton_yield_data_within_range = generate_cotton_yield_data(1500)

# Generate cotton yield dataset with values outside specified ranges
cotton_yield_data_out_of_range = generate_out_of_range_data(1500)

# Combine the datasets
cotton_yield_data_combined = {nutrient: np.concatenate([cotton_yield_data_within_range[nutrient],
                                                       cotton_yield_data_out_of_range[nutrient]])
                              for nutrient in nutrient_ranges.keys()}

# Create a DataFrame
df_cotton_yield = pd.DataFrame(cotton_yield_data_combined)

# Label the yield
df_cotton_yield['yield_label'] = label_yield(df_cotton_yield)

# Print the first few samples to verify
print(df_cotton_yield.head())


        zinc     boron  phosphorus   potassium    sulphur    Nitrogen  \
0   8.672217  0.990192   54.126532  134.248669  14.546511   85.141959   
1   9.967715  0.703237   66.166235  131.591588  14.781143   82.467475   
2  17.987525  1.287637   57.616329  132.860546  14.716638   76.687011   
3  17.788675  1.506513   56.157076  127.818043  13.840693  105.836338   
4  18.935967  0.861901   50.986611  135.492269  13.210089   68.225460   

   Temperature yield_label  
0    32.590864        good  
1    38.506309        good  
2    30.597493        good  
3    38.214346        good  
4    33.655774        good  


In [None]:
df_cotton_yield3 = df_cotton_yield.sample(frac=1, random_state=42).reset_index(drop=True)
df_cotton_yield3.head(50)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature,yield_label
0,8.616335,7.620756,81.784734,140.817239,23.351596,74.081761,32.652417,poor
1,6.486142,0.855687,55.697974,136.03435,12.341266,45.088763,37.551394,good
2,-3.050063,11.158807,59.495178,122.218101,13.002437,120.940035,15.650607,poor
3,13.71741,1.216561,62.221481,135.28253,14.137695,53.456172,43.60151,good
4,24.642487,-3.122935,67.836295,147.088947,19.535948,110.906299,50.025609,poor
5,17.267411,1.28985,53.425026,136.531953,13.156125,53.389282,25.215392,good
6,17.355577,1.210733,71.074816,129.96484,12.958101,73.296903,30.202598,good
7,7.112326,-0.453922,46.128409,153.247208,14.767251,49.55896,33.272076,poor
8,18.072321,0.909541,72.165387,132.722039,13.24629,65.856998,44.191422,good
9,17.462777,-2.018848,83.898204,123.060211,13.514425,108.196654,25.489488,poor


In [None]:
df_cotton_yield3.to_csv('synthetic_yield2.csv',index=False)

In [None]:
df_cotton_yield3['yield_label'].value_counts()

yield_label
poor    1499
good    1499
Name: count, dtype: int64

In [None]:
indices_to_drop = df_cotton_yield3[df_cotton_yield3['yield_label'] == 'good'].head(2).index

# Drop the rows using the indices
df_cotton_yield3 = df_cotton_yield3.drop(indices_to_drop)

In [None]:
df_cotton_yield3.isna().sum()

zinc           0
boron          0
phosphorus     0
potassium      0
sulphur        0
Nitrogen       0
Temperature    0
yield_label    0
dtype: int64

In [None]:
X = df_cotton_yield3.drop('yield_label',axis=1)
y = df_cotton_yield3['yield_label']
X

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
2934,-4.984012,-6.392182,56.250959,151.243972,18.114128,43.276447,18.267161
2533,-4.980979,9.068551,80.365096,131.296510,18.219619,72.199865,43.305149
2679,-4.974066,-5.162278,80.855254,132.834931,18.152781,39.492637,51.130021
1789,-4.967354,-4.333978,81.546237,131.381773,15.127985,37.717202,53.667639
2750,-4.966512,-3.508563,72.839644,127.914555,11.408858,115.767250,41.293378
...,...,...,...,...,...,...,...
2827,29.919956,4.834775,60.102429,131.731233,17.273270,105.077036,26.871446
2114,29.938667,3.877026,58.114842,141.921636,7.317923,114.084127,24.970464
2324,29.943639,1.770575,43.181444,149.909070,3.888395,101.003298,45.080299
1510,29.957757,-1.723425,67.032428,128.475093,4.771978,36.453005,48.069685


In [None]:
y.value_counts()

yield_label
poor    1499
good    1499
Name: count, dtype: int64

In [None]:
#one_hot_class = one_hot_encoder.transform(np.array([[data_class]]).reshape(-1, 1))

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam



X = df_cotton_yield3.drop('yield_label',axis=1)
y = df_cotton_yield3['yield_label']


scaler = MinMaxScaler()
X = scaler.fit_transform(X)


real_data = pd.DataFrame(X, columns=['zinc', 'boron', 'phosphorus', 'potassium','sulphur','Nitrogen','Temperature'])

real_labels = y


one_hot_encoder = OneHotEncoder(sparse=False)
one_hot_labels = one_hot_encoder.fit_transform(np.array(real_labels).reshape(-1, 1))
#one_hot_labels = one_hot_encoder.fit_transform(real_labels)
# Constants
NOISE_DIM = 100
NUM_CLASSES = 2
NUM_FEATURES = 7
BATCH_SIZE = 64
TRAINING_STEPS = 1000

# Generator
def create_generator():
    noise_input = Input(shape=(NOISE_DIM,))
    class_input = Input(shape=(NUM_CLASSES,))
    merged_input = Concatenate()([noise_input, class_input])
    hidden = Dense(128, activation='relu')(merged_input)
    output = Dense(NUM_FEATURES, activation='linear')(hidden)
    model = Model(inputs=[noise_input, class_input], outputs=output)
    return model

# Discriminator
def create_discriminator():
    data_input = Input(shape=(NUM_FEATURES,))
    class_input = Input(shape=(NUM_CLASSES,))
    merged_input = Concatenate()([data_input, class_input])
    hidden = Dense(128, activation='relu')(merged_input)
    output = Dense(1, activation='sigmoid')(hidden)
    model = Model(inputs=[data_input, class_input], outputs=output)
    return model

# cGAN
def create_cgan(generator, discriminator):
    noise_input = Input(shape=(NOISE_DIM,))
    class_input = Input(shape=(NUM_CLASSES,))
    generated_data = generator([noise_input, class_input])
    validity = discriminator([generated_data, class_input])
    model = Model(inputs=[noise_input, class_input], outputs=validity)
    return model

# Create and compile the Discriminator
discriminator = create_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=Adam())

# Create the Generator
generator = create_generator()

# Create the GAN
gan = create_cgan(generator, discriminator)

# Ensure that only the generator is trained
discriminator.trainable = False

gan.compile(loss='binary_crossentropy', optimizer=Adam())

# Train GAN
for step in range(TRAINING_STEPS):
    # Select a random batch of real data with labels
    idx = np.random.randint(0, real_data.shape[0], BATCH_SIZE)
    real_batch = real_data.iloc[idx].values
    labels_batch = one_hot_labels[idx]

    # Generate a batch of new data
    noise = np.random.normal(0, 1, (BATCH_SIZE, NOISE_DIM))
    generated_batch = generator.predict([noise, labels_batch])

    # Train the discriminator
    real_loss = discriminator.train_on_batch([real_batch, labels_batch], np.ones((BATCH_SIZE, 1)))
    fake_loss = discriminator.train_on_batch([generated_batch, labels_batch], np.zeros((BATCH_SIZE, 1)))
    discriminator_loss = 0.5 * np.add(real_loss, fake_loss)

    # Train the generator
    generator_loss = gan.train_on_batch([noise, labels_batch], np.ones((BATCH_SIZE, 1)))

    if step % 500 == 0:
        print(f"Step: {step}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}")

# Generate instances for a given class
def generate_data(generator, data_class, num_instances):
    #one_hot_class = one_hot_encoder.transform(np.array([[data_class]]))
  #  one_hot_class = one_hot_encoder.transform(np.array([[data_class]]).reshape(-1, 1))
    one_hot_class=data_class
    noise = np.random.normal(0, 1, (num_instances, NOISE_DIM))
    generated_data = generator.predict([noise, np.repeat(one_hot_class, num_instances, axis=0)])
    return pd.DataFrame(generated_data, columns=['zinc', 'boron', 'phosphorus', 'potassium','sulphur','Nitrogen','Temperature'])

# Generate instances of class here 0 :poor and good : good

# no errors all set




Step: 0, Discriminator Loss: 0.7224731147289276, Generator Loss: 0.5602298974990845
Step: 500, Discriminator Loss: 0.7023459374904633, Generator Loss: 0.6865008473396301


In [None]:
generated_poor = generate_data(generator, [[1,0]], 1500)



In [None]:
generated_poor

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
0,0.565528,0.593020,0.314459,0.620714,0.676832,0.501169,0.620355
1,0.445927,0.546164,0.409683,0.749025,0.617971,0.738168,0.564779
2,0.266940,0.489574,0.338505,0.608673,0.296532,0.610967,0.483543
3,0.443830,0.605433,0.247563,0.608295,0.462217,0.693120,0.459500
4,0.191721,0.487402,0.361905,0.438565,0.376171,0.515772,0.421766
...,...,...,...,...,...,...,...
1495,0.398364,0.577088,0.306508,0.712318,0.524324,0.514550,0.441795
1496,0.383598,0.541954,0.207969,0.713728,0.453239,0.664385,0.274993
1497,0.397370,0.611998,0.480082,0.364021,0.672940,0.910585,0.437419
1498,0.103915,0.413600,0.444648,0.608646,0.416799,0.643286,0.438530


In [None]:
generated_good = generate_data(generator, [[0,1]], 1500)



In [None]:
generated_good

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
0,0.264820,0.460789,0.367033,0.563310,0.568272,0.570182,0.774524
1,0.044564,0.397549,0.407862,0.487211,0.466890,0.535798,0.703461
2,0.290963,0.359053,0.316765,0.398015,0.356381,0.484295,0.437559
3,0.449611,0.589038,0.492661,0.726287,0.491592,0.701120,0.278957
4,0.371418,0.527941,0.391064,0.511060,0.577551,0.623926,0.444280
...,...,...,...,...,...,...,...
1495,0.447167,0.640462,0.285183,0.766745,0.433778,0.842089,0.800858
1496,0.602066,0.493477,0.413231,0.693864,0.520157,0.564198,0.443327
1497,0.530611,0.361483,0.556469,0.650393,0.568971,0.416319,0.294244
1498,0.396512,0.537275,0.225273,0.496702,0.435289,0.458345,0.377699


In [None]:
synthetic_data = pd.concat([generated_poor, generated_good], ignore_index=True)
synthetic_data = pd.DataFrame(scaler.inverse_transform(synthetic_data), columns=['zinc', 'boron', 'phosphorus', 'potassium','sulphur','Nitrogen','Temperature'])

In [None]:
synthetic_data

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature
0,14.796683,3.212119,54.157795,139.839783,17.569214,76.268448,39.813412
1,10.613351,2.207841,58.441887,144.967438,16.216242,97.919624,37.595707
2,4.352852,0.994931,55.239624,139.358582,8.827771,86.299110,34.354069
3,10.539989,3.478163,51.148193,139.343491,12.636147,93.804283,33.394653
4,1.721885,0.948384,56.292385,132.560577,10.658338,77.602554,31.888893
...,...,...,...,...,...,...,...
2995,10.656706,4.228961,52.840683,145.675568,11.982450,107.413429,47.016212
2996,16.074692,1.078590,58.601509,142.763046,13.967940,82.026474,32.749287
2997,13.575354,-1.750484,65.045731,141.025833,15.089958,68.516899,26.800262
2998,8.884944,2.017320,50.145382,134.883926,12.017194,72.356232,30.130428


In [None]:

synthetic_labels = [0]*1500 + [1]*1500

# Assigning synthetic labels to the DataFrame
synthetic_data['yield_label'] = synthetic_labels

# Renaming label values
synthetic_data['yield_label'] = synthetic_data['yield_label'].map({0: 'poor', 1: 'good'})

In [None]:
synthetic_data

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature,yield_label
0,14.796683,3.212119,54.157795,139.839783,17.569214,76.268448,39.813412,poor
1,10.613351,2.207841,58.441887,144.967438,16.216242,97.919624,37.595707,poor
2,4.352852,0.994931,55.239624,139.358582,8.827771,86.299110,34.354069,poor
3,10.539989,3.478163,51.148193,139.343491,12.636147,93.804283,33.394653,poor
4,1.721885,0.948384,56.292385,132.560577,10.658338,77.602554,31.888893,poor
...,...,...,...,...,...,...,...,...
2995,10.656706,4.228961,52.840683,145.675568,11.982450,107.413429,47.016212,good
2996,16.074692,1.078590,58.601509,142.763046,13.967940,82.026474,32.749287,good
2997,13.575354,-1.750484,65.045731,141.025833,15.089958,68.516899,26.800262,good
2998,8.884944,2.017320,50.145382,134.883926,12.017194,72.356232,30.130428,good


In [None]:
concatenated_df = pd.concat([df_cotton_yield3, synthetic_data], ignore_index=True)
concatenated_df.to_csv('gan_data.csv', index=False)

In [None]:
concatenated_df.head(10)

Unnamed: 0,zinc,boron,phosphorus,potassium,sulphur,Nitrogen,Temperature,yield_label
0,-4.984012,-6.392182,56.250959,151.243972,18.114128,43.276447,18.267161,poor
1,-4.980979,9.068551,80.365096,131.29651,18.219619,72.199865,43.305149,poor
2,-4.974066,-5.162278,80.855254,132.834931,18.152781,39.492637,51.130021,poor
3,-4.967354,-4.333978,81.546237,131.381773,15.127985,37.717202,53.667639,poor
4,-4.966512,-3.508563,72.839644,127.914555,11.408858,115.76725,41.293378,poor
5,-4.862194,1.186946,57.030415,130.281083,5.380411,54.630728,16.744061,poor
6,-4.859968,-5.902179,80.756192,145.476272,24.610168,33.835642,33.484098,poor
7,-4.855913,-2.279267,69.626885,123.474621,16.508529,105.745629,40.243335,poor
8,-4.854486,6.30834,64.752639,135.136414,17.286773,91.503842,54.446363,poor
9,-4.845912,-8.306382,79.355396,132.215052,17.477161,49.853151,52.69427,poor
