In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [11]:
crop_data = pd.read_csv("rule_based_data.csv",index_col=False)
if "Unnamed: 0" in crop_data.columns:
    crop_data = crop_data.drop(columns=["Unnamed: 0"])
# crop_data.drop(['WATER_SOURCE'],axis=1)
crop_data = crop_data[:20]

In [12]:
crop_data

Unnamed: 0,CROPS,TYPE_OF_CROP,SOIL,SOIL_PH,CROP_DURATION,TEMP,WATER_SOURCE,WATER_REQUIRED,RELATIVE_HUMIDITY
0,rice,cereals,Alluvia or loamy and clayey soil,6.4,150,39.91,"irrigated,rainfall",2242,79.09
1,rice,cereals,Alluvia or loamy and clayey soil,5.0,150,31.95,"irrigated,rainfall",906,79.17
2,rice,cereals,Alluvia or loamy and clayey soil,7.5,150,26.04,"irrigated,rainfall",2200,69.15
3,rice,cereals,Alluvia or loamy and clayey soil,6.4,150,20.87,"irrigated,rainfall",2390,75.78
4,rice,cereals,Alluvia or loamy and clayey soil,5.4,150,22.85,"irrigated,rainfall",1644,77.28
5,rice,cereals,Alluvia or loamy and clayey soil,7.9,150,23.65,"irrigated,rainfall",1672,60.24
6,rice,cereals,Alluvia or loamy and clayey soil,5.6,150,39.68,"irrigated,rainfall",1509,76.46
7,rice,cereals,Alluvia or loamy and clayey soil,5.8,150,27.71,"irrigated,rainfall",1458,74.27
8,rice,cereals,Alluvia or loamy and clayey soil,5.3,150,34.65,"irrigated,rainfall",2039,73.64
9,rice,cereals,Alluvia or loamy and clayey soil,5.8,150,25.33,"irrigated,rainfall",1782,76.83


In [13]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Extract numerical columns
numerical_columns = ["SOIL_PH", "CROP_DURATION", "TEMP", "WATER_REQUIRED", "RELATIVE_HUMIDITY"]
categorical_columns = ["CROPS", "WATER_SOURCE", "SOIL", "TYPE_OF_CROP",]

# Create a MinMaxScaler
scaler = MinMaxScaler()
# Normalize only numerical columns
crop_data[numerical_columns] = scaler.fit_transform(crop_data[numerical_columns])

# Perform one-hot encoding for categorical columns
encoder = OneHotEncoder(sparse=False, drop='first')
categorical_encoded = encoder.fit_transform(crop_data[categorical_columns])
categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=encoder.get_feature_names(categorical_columns))
# Concatenate the encoded categorical columns with the normalized numerical columns
normalized_data = pd.concat([crop_data[numerical_columns], categorical_encoded_df], axis=1)
print(normalized_data)



     SOIL_PH  CROP_DURATION      TEMP  WATER_REQUIRED  RELATIVE_HUMIDITY  \
0   0.482759       1.000000  1.000000        0.923316           0.997046   
1   0.000000       1.000000  0.581933        0.231088           1.000000   
2   0.862069       1.000000  0.271534        0.901554           0.629985   
3   0.482759       1.000000  0.000000        1.000000           0.874815   
4   0.137931       1.000000  0.103992        0.613472           0.930207   
5   1.000000       1.000000  0.146008        0.627979           0.300960   
6   0.206897       1.000000  0.987920        0.543523           0.899926   
7   0.275862       1.000000  0.359244        0.517098           0.819055   
8   0.103448       1.000000  0.723739        0.818135           0.795790   
9   0.275862       1.000000  0.234244        0.684974           0.913589   
10  0.413793       0.057971  0.162290        0.046632           0.000000   
11  0.379310       0.028986  0.039391        0.072021           0.067578   
12  0.689655

In [None]:
# GAN parameters
latent_dim = 100
num_samples = len(normalized_data)
epochs = 100
batch_size = 64

In [None]:
# Generator model
generator = keras.Sequential([
    layers.Input(shape=(latent_dim,)),
    layers.Dense(256, activation="relu"),
    layers.Dense(512, activation="relu"),
    layers.Dense(len(normalized_data.columns), activation="sigmoid")  # Output layer with same dimensions as input
])

In [None]:
# Discriminator model
discriminator = keras.Sequential([
    layers.Input(shape=(len(normalized_data.columns),)),
    layers.Dense(512, activation="relu"),
    layers.Dense(256, activation="relu"),
    layers.Dense(1, activation="sigmoid")  # Binary classification output
])

In [None]:
# GAN model (combining generator and discriminator)
discriminator.compile(loss="binary_crossentropy", optimizer="adam")
discriminator.trainable = False
gan_input = keras.Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = keras.Model(gan_input, gan_output)
gan.compile(loss="binary_crossentropy", optimizer="adam")


In [None]:
# Training loop
for i in range(2):
    if i >= 29:
        crop_data = org_data[30 * ((i) % 30):900]
    else:
        crop_data = org_data[30 * ((i) % 30):30 * ((i + 1) % 30)]

    # Choose a crop value for this iteration based on org_data
    constant_crop_value = org_data['CROPS'][30 * ((i) % 30)]
    for epoch in range(epochs):
        noise = np.random.normal(0, 1, size=(batch_size, latent_dim))
        generated_data = generator.predict(noise)

        real_data_indices = np.random.choice(len(crop_data), batch_size)
        real_data = crop_data.drop(columns=["CROPS","TYPE_OF_CROP","WATER_SOURCE","SOIL"]).iloc[real_data_indices].values.astype(np.float32)
        
        real_data_label = np.ones((batch_size, 1), dtype=np.float32)
        fake_data_label = np.zeros((batch_size, 1), dtype=np.float32)

        # Train discriminator
        d_loss_real = discriminator.train_on_batch(real_data, real_data_label)
        d_loss_fake = discriminator.train_on_batch(generated_data, fake_data_label)

        # Train generator (via GAN model)
        noise = np.random.normal(0, 1, size=(batch_size, latent_dim)).astype(np.float32)
        g_loss = gan.train_on_batch(noise, real_data_label)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}: D Loss Real: {d_loss_real:.4f}, D Loss Fake: {d_loss_fake:.4f}, G Loss: {g_loss:.4f}")




ValueError: in user code:

    File "c:\Users\lalit\anaconda3\lib\site-packages\keras\engine\training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\lalit\anaconda3\lib\site-packages\keras\engine\training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\lalit\anaconda3\lib\site-packages\keras\engine\training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\lalit\anaconda3\lib\site-packages\keras\engine\training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\lalit\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\lalit\anaconda3\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 69), found shape=(64, 5)


In [None]:
# Generate synthetic crop data
num_synthetic_samples = 50
noise = np.random.normal(0, 1, size=(num_synthetic_samples, latent_dim))
synthetic_data = generator.predict(noise)



In [None]:
synthetic_data

array([[6.38511324e-08, 5.40746839e-07, 4.66276084e-08, 1.06590456e-07,
        1.90399005e-04],
       [2.37441213e-07, 1.12554972e-06, 5.45028023e-08, 3.17165018e-07,
        4.35979746e-04],
       [2.12324352e-07, 1.20159120e-06, 1.77832248e-07, 3.77697717e-07,
        4.30264132e-04],
       [1.45453271e-06, 8.15512431e-06, 9.74481736e-07, 1.79603546e-06,
        1.72385084e-03],
       [1.05864865e-06, 5.69867780e-06, 3.03316796e-07, 1.26570160e-06,
        1.06418121e-03],
       [1.38833627e-07, 1.34786956e-06, 7.54645413e-08, 1.91786881e-07,
        6.08074246e-04],
       [5.44944533e-06, 9.87878502e-06, 1.92133029e-06, 5.99117493e-06,
        1.35883328e-03],
       [3.29854606e-06, 1.05219851e-05, 1.74408240e-06, 4.87435591e-06,
        2.14578095e-03],
       [2.53604782e-07, 1.42587851e-06, 1.13460622e-07, 4.55123967e-07,
        6.38896658e-04],
       [9.13699068e-07, 5.22389064e-06, 3.77664236e-07, 1.21441337e-06,
        6.88746804e-04],
       [2.69480893e-06, 5.8897

In [None]:
# Denormalize synthetic data
synthetic_data_denormalized = (synthetic_data * (scaler.data_max_ - scaler.data_min_)) + scaler.data_min_

In [None]:
decimal_places = {"SOIL_PH": 1, "CROP_DURATION": 0, "TEMP": 2, "WATER_REQUIRED": 0, "RELATIVE_HUMIDITY": 2}
synthetic_data_denormalized_rounded = synthetic_data_denormalized.copy()

for column, places in decimal_places.items():
    synthetic_data_denormalized_rounded[:, normalized_data.columns.get_loc(column)] = np.round(synthetic_data_denormalized[:, normalized_data.columns.get_loc(column)], places)


In [None]:
# Create a DataFrame from synthetic data
synthetic_df = pd.DataFrame(synthetic_data_denormalized_rounded, columns=normalized_data.columns)
synthetic_df.insert(0, 'CROPS', "rice")
synthetic_df.insert(1, 'TYPE_OF_CROP', crop_data["TYPE_OF_CROP"][0])
synthetic_df.insert(2, 'SOIL', crop_data["SOIL"][0])
synthetic_df.insert(5, 'WATERSOURCE', "irrigated,rainfall")

In [None]:
# Save synthetic data to CSV
synthetic_df.to_csv("synthetic_crop_data.csv", index=False)