In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [5]:
crop_data = pd.read_csv("rule_based_data.csv",index_col=False)
if "Unnamed: 0" in crop_data.columns:
    crop_data = crop_data.drop(columns=["Unnamed: 0"])
# crop_data.drop(['WATER_SOURCE'],axis=1)
crop_data = crop_data[:30]

In [6]:
crop_data

Unnamed: 0,CROPS,SOIL,SOIL_PH,CROP_DURATION,TEMP,WATER_SOURCE,WATER_REQUIRED,RELATIVE_HUMIDITY
0,rice,Alluvia or loamy and clayey soil,5.2,150,22.23,"irrigated,rainfall",1457,65.03
1,rice,Alluvia or loamy and clayey soil,8.0,150,22.41,"irrigated,rainfall",926,62.29
2,rice,Alluvia or loamy and clayey soil,6.0,150,34.61,"irrigated,rainfall",2448,68.13
3,rice,Alluvia or loamy and clayey soil,5.0,150,21.14,"irrigated,rainfall",2433,62.59
4,rice,Alluvia or loamy and clayey soil,5.7,150,24.2,"irrigated,rainfall",1754,78.22
5,rice,Alluvia or loamy and clayey soil,6.4,150,39.38,"irrigated,rainfall",1268,78.07
6,rice,Alluvia or loamy and clayey soil,7.1,150,34.59,"irrigated,rainfall",1796,62.11
7,rice,Alluvia or loamy and clayey soil,6.4,150,24.45,"irrigated,rainfall",1101,63.59
8,rice,Alluvia or loamy and clayey soil,7.4,150,38.04,"irrigated,rainfall",917,63.16
9,rice,Alluvia or loamy and clayey soil,5.9,150,30.48,"irrigated,rainfall",1460,74.67


In [7]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Extract numerical columns
numerical_columns = ["SOIL_PH", "CROP_DURATION", "TEMP", "WATER_REQUIRED", "RELATIVE_HUMIDITY"]
categorical_columns = ["CROPS", "WATER_SOURCE", "SOIL"]

# Create a MinMaxScaler
scaler = MinMaxScaler()
# Normalize only numerical columns
crop_data[numerical_columns] = scaler.fit_transform(crop_data[numerical_columns])

# Perform one-hot encoding for categorical columns
encoder = OneHotEncoder(sparse=False, drop='first')
categorical_encoded = encoder.fit_transform(crop_data[categorical_columns])
categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=encoder.get_feature_names(categorical_columns))

# Concatenate the encoded categorical columns with the normalized numerical columns
normalized_data = pd.concat([crop_data[numerical_columns], categorical_encoded_df], axis=1)




In [8]:
# GAN parameters
latent_dim = 100
num_samples = len(normalized_data)
epochs = 1000
batch_size = 64

In [9]:
# Generator model
generator = keras.Sequential([
    layers.Input(shape=(latent_dim,)),
    layers.Dense(256, activation="relu"),
    layers.Dense(512, activation="relu"),
    layers.Dense(len(normalized_data.columns), activation="sigmoid")  # Output layer with same dimensions as input
])

In [10]:
# Discriminator model
discriminator = keras.Sequential([
    layers.Input(shape=(len(normalized_data.columns),)),
    layers.Dense(512, activation="relu"),
    layers.Dense(256, activation="relu"),
    layers.Dense(1, activation="sigmoid")  # Binary classification output
])

In [11]:
# GAN model (combining generator and discriminator)
discriminator.compile(loss="binary_crossentropy", optimizer="adam")
discriminator.trainable = False
gan_input = keras.Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = keras.Model(gan_input, gan_output)
gan.compile(loss="binary_crossentropy", optimizer="adam")


In [12]:
# Training loop
for epoch in range(epochs):
    noise = np.random.normal(0, 1, size=(batch_size, latent_dim))
    generated_data = generator.predict(noise)

    real_data_indices = np.random.choice(len(crop_data), batch_size)
    real_data = crop_data.drop(columns=["CROPS","WATER_SOURCE","SOIL"]).iloc[real_data_indices].values.astype(np.float32)
    
    real_data_label = np.ones((batch_size, 1), dtype=np.float32)
    fake_data_label = np.zeros((batch_size, 1), dtype=np.float32)

    # Train discriminator
    d_loss_real = discriminator.train_on_batch(real_data, real_data_label)
    d_loss_fake = discriminator.train_on_batch(generated_data, fake_data_label)

    # Train generator (via GAN model)
    noise = np.random.normal(0, 1, size=(batch_size, latent_dim)).astype(np.float32)
    g_loss = gan.train_on_batch(noise, real_data_label)

    if epoch % 100 == 0:
        print(f"Epoch {epoch}: D Loss Real: {d_loss_real:.4f}, D Loss Fake: {d_loss_fake:.4f}, G Loss: {g_loss:.4f}")


Epoch 0: D Loss Real: 0.7210, D Loss Fake: 0.7514, G Loss: 0.6565
Epoch 100: D Loss Real: 0.0225, D Loss Fake: 0.0403, G Loss: 3.2771
Epoch 200: D Loss Real: 0.1087, D Loss Fake: 0.0293, G Loss: 3.6017
Epoch 300: D Loss Real: 0.0287, D Loss Fake: 0.0166, G Loss: 4.1326
Epoch 400: D Loss Real: 0.0046, D Loss Fake: 0.0043, G Loss: 5.4671
Epoch 500: D Loss Real: 0.0553, D Loss Fake: 0.6659, G Loss: 0.9053
Epoch 600: D Loss Real: 0.0493, D Loss Fake: 0.0436, G Loss: 4.1268
Epoch 700: D Loss Real: 0.1834, D Loss Fake: 0.1104, G Loss: 2.2874
Epoch 800: D Loss Real: 0.2274, D Loss Fake: 0.2861, G Loss: 3.7956
Epoch 900: D Loss Real: 0.6373, D Loss Fake: 0.7096, G Loss: 1.3101


In [13]:
# Generate synthetic crop data
num_synthetic_samples = 1000
noise = np.random.normal(0, 1, size=(num_synthetic_samples, latent_dim))
synthetic_data = generator.predict(noise)



In [14]:
synthetic_data

array([[3.8539553e-01, 5.1981770e-04, 5.8513695e-01, 3.7930655e-01,
        3.3433476e-01],
       [3.2638755e-01, 2.9831464e-05, 7.3803818e-01, 6.4183784e-01,
        2.4600095e-01],
       [1.0000000e+00, 0.0000000e+00, 1.0000000e+00, 2.4484393e-06,
        6.2222902e-08],
       ...,
       [9.9999636e-01, 0.0000000e+00, 1.0000000e+00, 9.7483760e-05,
        8.9683192e-05],
       [3.0130306e-01, 2.6501118e-07, 5.8048457e-01, 8.2154739e-01,
        3.6086056e-02],
       [2.4094924e-01, 2.4430678e-06, 7.7973288e-01, 8.1499207e-01,
        1.4521939e-01]], dtype=float32)

In [15]:
# Denormalize synthetic data
synthetic_data_denormalized = (synthetic_data * (scaler.data_max_ - scaler.data_min_)) + scaler.data_min_

In [16]:
decimal_places = {"SOIL_PH": 1, "CROP_DURATION": 0, "TEMP": 2, "WATER_REQUIRED": 0, "RELATIVE_HUMIDITY": 2}
synthetic_data_denormalized_rounded = synthetic_data_denormalized.copy()

for column, places in decimal_places.items():
    synthetic_data_denormalized_rounded[:, normalized_data.columns.get_loc(column)] = np.round(synthetic_data_denormalized[:, normalized_data.columns.get_loc(column)], places)


In [18]:
# Create a DataFrame from synthetic data
synthetic_df = pd.DataFrame(synthetic_data_denormalized_rounded, columns=normalized_data.columns)
synthetic_df.insert(0, 'CROPS', "rice")
synthetic_df.insert(1, 'SOIL', crop_data["SOIL"][0])
synthetic_df.insert(4, 'WATERSOURCE', "irrigated,rainfall")

In [None]:
# Save synthetic data to CSV
synthetic_df.to_csv("synthetic_crop_data.csv", index=False)