In [68]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [69]:
org_data = pd.read_csv("rule_based_data.csv",index_col=False)
if "Unnamed: 0" in org_data.columns:
    org_data = org_data.drop(columns=["Unnamed: 0"])

In [70]:
org_data

Unnamed: 0,CROPS,TYPE_OF_CROP,SOIL,SEASON,SOWN,HARVESTED,SOIL_PH,CROP_DURATION,TEMP,WATER_SOURCE,WATER_REQUIRED,RELATIVE_HUMIDITY
0,rice,cereals,Alluvia or loamy and clayey soil,kharif,Jun-Jul,Sep-Oct,6.8,150,23.36,"irrigated,rainfall",1276,72.14
1,rice,cereals,Alluvia or loamy and clayey soil,kharif,Jun-Jul,Sep-Oct,7.5,150,30.47,"irrigated,rainfall",2126,77.16
2,rice,cereals,Alluvia or loamy and clayey soil,kharif,Jun-Jul,Sep-Oct,5.8,150,30.37,"irrigated,rainfall",1970,68.62
3,rice,cereals,Alluvia or loamy and clayey soil,kharif,Jun-Jul,Sep-Oct,7.2,150,38.34,"irrigated,rainfall",1849,74.92
4,rice,cereals,Alluvia or loamy and clayey soil,kharif,Jun-Jul,Sep-Oct,5.8,150,38.18,"irrigated,rainfall",1685,72.60
...,...,...,...,...,...,...,...,...,...,...,...,...
2845,small onion,bulbvegetables,Sandy loam,Zaid,Mar-Jul,Mar-Jul,6.9,89,19.96,"irrigated,rainfall",661,67.01
2846,small onion,bulbvegetables,Sandy loam,Zaid,Mar-Jul,Mar-Jul,6.4,73,20.46,"irrigated,rainfall",693,72.78
2847,small onion,bulbvegetables,Sandy loam,Zaid,Mar-Jul,Mar-Jul,6.7,87,18.54,"irrigated,rainfall",671,67.43
2848,small onion,bulbvegetables,Sandy loam,Zaid,Mar-Jul,Mar-Jul,6.8,83,21.54,"irrigated,rainfall",722,74.13


In [71]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
offset = 50
synthetic_df=pd.DataFrame()
for i in range(25,26):
    print(i)
    if i==0:
        crop_data = org_data[(i*offset):(i+1)*offset]
    else:
        crop_data = org_data[(i*offset)+1:(i+1)*offset]
    # Extract numerical columns
    numerical_columns = ["SOIL_PH", "CROP_DURATION", "TEMP", "WATER_REQUIRED", "RELATIVE_HUMIDITY"]
    categorical_columns = ["CROPS", "WATER_SOURCE", "SOIL","TYPE_OF_CROP","SEASON","SOWN","HARVESTED"]

    # Create a MinMaxScaler
    scaler = MinMaxScaler()
    # Normalize only numerical columns
    crop_data[numerical_columns] = scaler.fit_transform(crop_data[numerical_columns])

    # Perform one-hot encoding for categorical columns
    encoder = OneHotEncoder(sparse=False, drop='first')
    categorical_encoded = encoder.fit_transform(crop_data[categorical_columns])
    categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=encoder.get_feature_names(categorical_columns))

    # Concatenate the encoded categorical columns with the normalized numerical columns
    normalized_data = pd.concat([crop_data[numerical_columns], categorical_encoded_df], axis=1)
    # GAN parameters
    latent_dim = 100
    num_samples = len(normalized_data)
    epochs = 1000
    batch_size = 64
    # Generator model
    generator = keras.Sequential([
        layers.Input(shape=(latent_dim,)),
        layers.Dense(256, activation="relu"),
        layers.Dense(512, activation="relu"),
        layers.Dense(len(normalized_data.columns), activation="sigmoid")  # Output layer with same dimensions as input
    ])
    # Discriminator model
    discriminator = keras.Sequential([
        layers.Input(shape=(len(normalized_data.columns),)),
        layers.Dense(512, activation="relu"),
        layers.Dense(256, activation="relu"),
        layers.Dense(1, activation="sigmoid")  # Binary classification output
    ])
    # GAN model (combining generator and discriminator)
    discriminator.compile(loss="binary_crossentropy", optimizer="adam")
    discriminator.trainable = False
    gan_input = keras.Input(shape=(latent_dim,))
    gan_output = discriminator(generator(gan_input))
    gan = keras.Model(gan_input, gan_output)
    gan.compile(loss="binary_crossentropy", optimizer="adam")

    # Training loop
    for epoch in range(epochs):
        noise = np.random.normal(0, 1, size=(batch_size, latent_dim))
        generated_data = generator.predict(noise)

        real_data_indices = np.random.choice(len(crop_data), batch_size)
        real_data = crop_data.drop(columns=["CROPS","WATER_SOURCE","SOIL","TYPE_OF_CROP","SEASON","SOWN","HARVESTED"]).iloc[real_data_indices].values.astype(np.float32)

        real_data_label = np.ones((batch_size, 1), dtype=np.float32)
        fake_data_label = np.zeros((batch_size, 1), dtype=np.float32)

        # Train discriminator
        d_loss_real = discriminator.train_on_batch(real_data, real_data_label)
        d_loss_fake = discriminator.train_on_batch(generated_data, fake_data_label)

        # Train generator (via GAN model)
        noise = np.random.normal(0, 1, size=(batch_size, latent_dim)).astype(np.float32)
        g_loss = gan.train_on_batch(noise, real_data_label)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}: D Loss Real: {d_loss_real:.4f}, D Loss Fake: {d_loss_fake:.4f}, G Loss: {g_loss:.4f}")
    
    # Generate synthetic crop data
    num_synthetic_samples = 100
    noise = np.random.normal(0, 1, size=(num_synthetic_samples, latent_dim))
    synthetic_data = generator.predict(noise)
    synthetic_data_denormalized = (synthetic_data * (scaler.data_max_ - scaler.data_min_)) + scaler.data_min_
    
    for col in normalized_data.columns:
        col_min = org_data[col].min()
        col_max = org_data[col].max()
        synthetic_data_denormalized[:, normalized_data.columns.get_loc(col)] = np.clip(
            synthetic_data_denormalized[:, normalized_data.columns.get_loc(col)], col_min, col_max
        )
    
    decimal_places = {"SOIL_PH": 1, "CROP_DURATION": 0, "TEMP": 2, "WATER_REQUIRED": 0, "RELATIVE_HUMIDITY": 2}
    synthetic_data_denormalized_rounded = synthetic_data_denormalized.copy()
    for column, places in decimal_places.items():
        synthetic_data_denormalized_rounded[:, normalized_data.columns.get_loc(column)] = np.round(synthetic_data_denormalized[:, normalized_data.columns.get_loc(column)], places)
    

    epoch_synthetic_df = pd.DataFrame(synthetic_data_denormalized_rounded, columns=normalized_data.columns)
    epoch_synthetic_df.insert(0, 'CROPS', org_data["CROPS"][(offset*i)+1])
    epoch_synthetic_df.insert(1, 'TYPE_OF_CROP', org_data["TYPE_OF_CROP"][(offset*i)+1])
    epoch_synthetic_df.insert(2, 'SOIL', org_data["SOIL"][(offset*i)+1])
    epoch_synthetic_df.insert(3, 'SEASON', org_data["SEASON"][(offset*i)+1])
    epoch_synthetic_df.insert(4, 'SOWN', org_data["SOWN"][(offset*i)+1])
    epoch_synthetic_df.insert(5, 'HARVESTED', org_data["HARVESTED"][(offset*i)+1])
    epoch_synthetic_df.insert(8, 'WATERSOURCE', org_data["WATER_SOURCE"][(offset*i)+1])
    synthetic_df = synthetic_df.append(epoch_synthetic_df, ignore_index=True)
    

25


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


Epoch 0: D Loss Real: 0.6865, D Loss Fake: 0.8092, G Loss: 0.6055
Epoch 100: D Loss Real: 0.0181, D Loss Fake: 0.0395, G Loss: 3.2808
Epoch 200: D Loss Real: 0.0022, D Loss Fake: 0.0015, G Loss: 6.5416
Epoch 300: D Loss Real: 0.9811, D Loss Fake: 0.2136, G Loss: 1.6458
Epoch 400: D Loss Real: 0.2094, D Loss Fake: 0.0229, G Loss: 3.7744
Epoch 500: D Loss Real: 0.7275, D Loss Fake: 0.0049, G Loss: 5.3176
Epoch 600: D Loss Real: 0.0593, D Loss Fake: 0.0178, G Loss: 4.0316
Epoch 700: D Loss Real: 0.7519, D Loss Fake: 0.7005, G Loss: 0.9441
Epoch 800: D Loss Real: 1.0357, D Loss Fake: 0.7478, G Loss: 1.5742
Epoch 900: D Loss Real: 0.3369, D Loss Fake: 0.3443, G Loss: 1.5207


In [72]:
synthetic_df

Unnamed: 0,CROPS,TYPE_OF_CROP,SOIL,SEASON,SOWN,HARVESTED,SOIL_PH,CROP_DURATION,WATERSOURCE,TEMP,WATER_REQUIRED,RELATIVE_HUMIDITY
0,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,6.0,180.0,irrigated,24.57,601.0,72.32
1,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,7.8,162.0,irrigated,15.20,779.0,69.79
2,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,7.3,170.0,irrigated,17.51,706.0,69.10
3,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,7.0,171.0,irrigated,17.46,703.0,67.73
4,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,7.3,170.0,irrigated,17.16,712.0,68.81
...,...,...,...,...,...,...,...,...,...,...,...,...
95,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,6.6,179.0,irrigated,15.81,603.0,77.53
96,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,7.3,170.0,irrigated,17.51,706.0,69.10
97,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,6.0,180.0,irrigated,21.81,601.0,71.30
98,sugarbeet,sugar crops,Well drained sandy loam and clayey loam soils,kharif,Jun-Jul,Sep-Oct,7.3,170.0,irrigated,17.52,705.0,69.12


In [73]:
# Save synthetic data to CSV
# synthetic_df.to_csv("synthetic_crop_data.csv")

In [75]:

synthetic_df.to_csv("crop_data2.csv", mode='a', header=False)