In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
crop_data = pd.read_csv("rule_based_data.csv",index_col=False)
if "Unnamed: 0" in crop_data.columns:
    crop_data = crop_data.drop(columns=["Unnamed: 0"])
# crop_data.drop(['WATER_SOURCE'],axis=1)
crop_data = crop_data[:30]

In [3]:
crop_data

Unnamed: 0,CROPS,TYPE_OF_CROP,SOIL,SOIL_PH,CROP_DURATION,TEMP,WATER_SOURCE,WATER_REQUIRED,RELATIVE_HUMIDITY
0,rice,cereals,Alluvia or loamy and clayey soil,6.2,150,21.18,"irrigated,rainfall",2059,62.05
1,rice,cereals,Alluvia or loamy and clayey soil,7.9,150,29.56,"irrigated,rainfall",1364,69.54
2,rice,cereals,Alluvia or loamy and clayey soil,7.0,150,26.33,"irrigated,rainfall",2370,72.13
3,rice,cereals,Alluvia or loamy and clayey soil,5.7,150,31.65,"irrigated,rainfall",1771,72.77
4,rice,cereals,Alluvia or loamy and clayey soil,7.7,150,22.47,"irrigated,rainfall",1745,63.48
5,rice,cereals,Alluvia or loamy and clayey soil,6.6,150,21.58,"irrigated,rainfall",1093,77.56
6,rice,cereals,Alluvia or loamy and clayey soil,6.5,150,29.43,"irrigated,rainfall",1970,65.89
7,rice,cereals,Alluvia or loamy and clayey soil,6.6,150,22.69,"irrigated,rainfall",1131,68.0
8,rice,cereals,Alluvia or loamy and clayey soil,5.7,150,25.29,"irrigated,rainfall",1926,61.96
9,rice,cereals,Alluvia or loamy and clayey soil,5.4,150,31.38,"irrigated,rainfall",2413,66.53


In [4]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

# Extract numerical columns
numerical_columns = ["SOIL_PH", "CROP_DURATION", "TEMP", "WATER_REQUIRED", "RELATIVE_HUMIDITY"]
categorical_columns = ["CROPS", "WATER_SOURCE", "SOIL", "TYPE_OF_CROP",]

# Create a MinMaxScaler
scaler = MinMaxScaler()
# Normalize only numerical columns
crop_data[numerical_columns] = scaler.fit_transform(crop_data[numerical_columns])

# Perform one-hot encoding for categorical columns
encoder = OneHotEncoder(sparse=False, drop='first')
categorical_encoded = encoder.fit_transform(crop_data[categorical_columns])
categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=encoder.get_feature_names(categorical_columns))

# Concatenate the encoded categorical columns with the normalized numerical columns
normalized_data = pd.concat([crop_data[numerical_columns], categorical_encoded_df], axis=1)




In [5]:
# GAN parameters
latent_dim = 100
num_samples = len(normalized_data)
epochs = 100
batch_size = 64

In [6]:
# Generator model
generator = keras.Sequential([
    layers.Input(shape=(latent_dim,)),
    layers.Dense(256, activation="relu"),
    layers.Dense(512, activation="relu"),
    layers.Dense(len(normalized_data.columns), activation="sigmoid")  # Output layer with same dimensions as input
])

In [7]:
# Discriminator model
discriminator = keras.Sequential([
    layers.Input(shape=(len(normalized_data.columns),)),
    layers.Dense(512, activation="relu"),
    layers.Dense(256, activation="relu"),
    layers.Dense(1, activation="sigmoid")  # Binary classification output
])

In [8]:
# GAN model (combining generator and discriminator)
discriminator.compile(loss="binary_crossentropy", optimizer="adam")
discriminator.trainable = False
gan_input = keras.Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = keras.Model(gan_input, gan_output)
gan.compile(loss="binary_crossentropy", optimizer="adam")


In [9]:
# Training loop
for epoch in range(epochs):
    noise = np.random.normal(0, 1, size=(batch_size, latent_dim))
    generated_data = generator.predict(noise)

    real_data_indices = np.random.choice(len(crop_data), batch_size)
    real_data = crop_data.drop(columns=["CROPS","TYPE_OF_CROP","WATER_SOURCE","SOIL"]).iloc[real_data_indices].values.astype(np.float32)
    
    real_data_label = np.ones((batch_size, 1), dtype=np.float32)
    fake_data_label = np.zeros((batch_size, 1), dtype=np.float32)

    # Train discriminator
    d_loss_real = discriminator.train_on_batch(real_data, real_data_label)
    d_loss_fake = discriminator.train_on_batch(generated_data, fake_data_label)

    # Train generator (via GAN model)
    noise = np.random.normal(0, 1, size=(batch_size, latent_dim)).astype(np.float32)
    g_loss = gan.train_on_batch(noise, real_data_label)

    if epoch % 100 == 0:
        print(f"Epoch {epoch}: D Loss Real: {d_loss_real:.4f}, D Loss Fake: {d_loss_fake:.4f}, G Loss: {g_loss:.4f}")




Epoch 0: D Loss Real: 0.6850, D Loss Fake: 0.8019, G Loss: 0.6217


In [10]:
# Generate synthetic crop data
num_synthetic_samples = 50
noise = np.random.normal(0, 1, size=(num_synthetic_samples, latent_dim))
synthetic_data = generator.predict(noise)



In [11]:
synthetic_data

array([[6.38511324e-08, 5.40746839e-07, 4.66276084e-08, 1.06590456e-07,
        1.90399005e-04],
       [2.37441213e-07, 1.12554972e-06, 5.45028023e-08, 3.17165018e-07,
        4.35979746e-04],
       [2.12324352e-07, 1.20159120e-06, 1.77832248e-07, 3.77697717e-07,
        4.30264132e-04],
       [1.45453271e-06, 8.15512431e-06, 9.74481736e-07, 1.79603546e-06,
        1.72385084e-03],
       [1.05864865e-06, 5.69867780e-06, 3.03316796e-07, 1.26570160e-06,
        1.06418121e-03],
       [1.38833627e-07, 1.34786956e-06, 7.54645413e-08, 1.91786881e-07,
        6.08074246e-04],
       [5.44944533e-06, 9.87878502e-06, 1.92133029e-06, 5.99117493e-06,
        1.35883328e-03],
       [3.29854606e-06, 1.05219851e-05, 1.74408240e-06, 4.87435591e-06,
        2.14578095e-03],
       [2.53604782e-07, 1.42587851e-06, 1.13460622e-07, 4.55123967e-07,
        6.38896658e-04],
       [9.13699068e-07, 5.22389064e-06, 3.77664236e-07, 1.21441337e-06,
        6.88746804e-04],
       [2.69480893e-06, 5.8897

In [12]:
# Denormalize synthetic data
synthetic_data_denormalized = (synthetic_data * (scaler.data_max_ - scaler.data_min_)) + scaler.data_min_

In [13]:
decimal_places = {"SOIL_PH": 1, "CROP_DURATION": 0, "TEMP": 2, "WATER_REQUIRED": 0, "RELATIVE_HUMIDITY": 2}
synthetic_data_denormalized_rounded = synthetic_data_denormalized.copy()

for column, places in decimal_places.items():
    synthetic_data_denormalized_rounded[:, normalized_data.columns.get_loc(column)] = np.round(synthetic_data_denormalized[:, normalized_data.columns.get_loc(column)], places)


In [14]:
# Create a DataFrame from synthetic data
synthetic_df = pd.DataFrame(synthetic_data_denormalized_rounded, columns=normalized_data.columns)
synthetic_df.insert(0, 'CROPS', "rice")
synthetic_df.insert(1, 'TYPE_OF_CROP', crop_data["TYPE_OF_CROP"][0])
synthetic_df.insert(2, 'SOIL', crop_data["SOIL"][0])
synthetic_df.insert(5, 'WATERSOURCE', "irrigated,rainfall")

In [16]:
# Save synthetic data to CSV
synthetic_df.to_csv("synthetic_crop_data.csv", index=False)