In [2]:
import numpy as np
import pandas as pd

def generate_initial_soil(soil_type):
    ranges = INITIAL_SOIL_RANGES[soil_type]
    return {
        'N_initial': np.random.uniform(*ranges['N']),
        'P_initial': np.random.uniform(*ranges['P']),
        'K_initial': np.random.uniform(*ranges['K']),
        'OM_initial': np.random.uniform(*ranges['OM']),
        'pH_initial': np.random.uniform(*ranges['pH'])
    }

def apply_amendment(initial_props, amendment):
    """Apply amendment effects to soil properties"""
    effects = AMENDMENT_EFFECTS[amendment]
    return {
        'N_after': initial_props['N_initial'] + np.random.uniform(*effects['N']),
        'P_after': initial_props['P_initial'] + np.random.uniform(*effects['P']),
        'K_after': initial_props['K_initial'] + np.random.uniform(*effects['K']),
        'OM_after': initial_props['OM_initial'] + np.random.uniform(*effects['OM']),
        'pH_after': initial_props['pH_initial'] + np.random.uniform(*effects['pH'])
    }

def calculate_yield(crop, soil_type, amendment):
    """Calculate crop yield with amendment impact"""
    base_min, base_max = BASE_YIELDS[crop][soil_type]
    base_yield = np.random.uniform(base_min, base_max)
    
    if amendment != 'None':
        increase_factor = np.random.uniform(*YIELD_INCREASE_FACTORS[amendment])
        return base_yield * (1 + increase_factor)
    return base_yield

def generate_sample(num_samples=1000):
    """Generate a dataset of soil amendment samples"""
    data = []
    for _ in range(num_samples):
        soil_type = np.random.choice(SOIL_TYPES)
        crop = np.random.choice(CROPS)
        amendment = np.random.choice(AMENDMENTS)
        
        # Generate initial soil properties
        initial_props = generate_initial_soil(soil_type)
        
        # Apply amendment
        amended_props = apply_amendment(initial_props, amendment)
        
        # Calculate yield
        yield_value = calculate_yield(crop, soil_type, amendment)
        
        # Combine all data
        sample = {
            'Soil_Type': soil_type,
            'Crop': crop,
            'Amendment': amendment,
            **initial_props,
            **amended_props,
            'Yield': yield_value
        }
        data.append(sample)
    
    return pd.DataFrame(data)

def save_dataset(df, filename='soil_amendment_data.csv'):
    """Save dataset to CSV"""
    df.to_csv(filename, index=False)
    print(f"Dataset saved to {filename} with {len(df)} records")



# Soil Properties Configuration
SOIL_TYPES = ['Sandy', 'Loamy', 'Clay']
CROPS = ['Corn', 'Wheat', 'Soybean', 'Rice']
AMENDMENTS = [
    'Compost', 'Manure', 'Biochar', 
    'NPK_Fertilizer', 'Green_Manure', 'None'
]

# Initial Soil Property Ranges by Soil Type (N:P:K in kg/ha, OM in %)
INITIAL_SOIL_RANGES = {
    'Sandy': {'N': (50, 100), 'P': (10, 30), 'K': (50, 150), 'OM': (0.5, 2), 'pH': (4.5, 6.5)},
    'Loamy': {'N': (100, 200), 'P': (20, 50), 'K': (150, 250), 'OM': (2, 5), 'pH': (5.5, 7.0)},
    'Clay': {'N': (150, 250), 'P': (30, 70), 'K': (200, 350), 'OM': (3, 8), 'pH': (6.0, 8.0)}
}

# Amendment Impact Adjustments (Additive changes)
AMENDMENT_EFFECTS = {
    'Compost': {'N': (20, 50), 'P': (10, 30), 'K': (20, 50), 'OM': (1.0, 3.0), 'pH': (0.1, 0.5)},
    'Manure': {'N': (30, 70), 'P': (20, 40), 'K': (30, 70), 'OM': (1.0, 2.0), 'pH': (0.1, 0.3)},
    'Biochar': {'N': (5, 15), 'P': (5, 15), 'K': (10, 30), 'OM': (0.5, 2.0), 'pH': (0.5, 1.0)},
    'NPK_Fertilizer': {'N': (100, 200), 'P': (50, 100), 'K': (100, 200), 'OM': (0, 0), 'pH': (-0.3, -0.1)},
    'Green_Manure': {'N': (40, 80), 'P': (10, 20), 'K': (20, 40), 'OM': (0.5, 1.5), 'pH': (0, 0)},
    'None': {'N': (0, 0), 'P': (0, 0), 'K': (0, 0), 'OM': (0, 0), 'pH': (0, 0)}
}

# Yield Parameters (ton/ha)
BASE_YIELDS = {
    'Corn': {'Sandy': (3, 5), 'Loamy': (5, 8), 'Clay': (4, 7)},
    'Wheat': {'Sandy': (2, 4), 'Loamy': (4, 6), 'Clay': (3, 5)},
    'Soybean': {'Sandy': (1.5, 3), 'Loamy': (2.5, 4), 'Clay': (2, 3.5)},
    'Rice': {'Sandy': (2, 3.5), 'Loamy': (3.5, 6), 'Clay': (4, 7)}
}

YIELD_INCREASE_FACTORS = {
    'Compost': (0.10, 0.30),
    'Manure': (0.15, 0.35),
    'Biochar': (0.05, 0.20),
    'NPK_Fertilizer': (0.40, 0.60),
    'Green_Manure': (0.10, 0.25),
    'None': (0, 0)
}

pd.set_option('display.float_format', lambda x: '%.2f' % x)
df = generate_sample(num_samples=1000)
display(df.head(10))

Unnamed: 0,Soil_Type,Crop,Amendment,N_initial,P_initial,K_initial,OM_initial,pH_initial,N_after,P_after,K_after,OM_after,pH_after,Yield
0,Loamy,Wheat,Green_Manure,163.6,20.83,196.63,3.9,6.55,218.5,39.96,217.29,5.13,6.55,6.15
1,Clay,Rice,,161.78,44.7,307.25,5.3,7.54,161.78,44.7,307.25,5.3,7.54,5.68
2,Sandy,Corn,Green_Manure,57.77,22.87,58.01,1.2,5.52,118.86,33.8,96.93,2.27,5.52,4.4
3,Clay,Soybean,NPK_Fertilizer,204.57,39.54,295.39,5.78,7.33,340.72,98.08,486.89,5.78,7.03,3.31
4,Loamy,Wheat,Biochar,131.96,39.49,170.77,2.82,5.54,145.62,47.08,186.64,4.8,6.39,5.86
5,Clay,Soybean,Green_Manure,222.37,33.23,256.71,3.6,7.25,264.82,48.72,285.17,4.32,7.25,3.97
6,Clay,Corn,NPK_Fertilizer,210.46,58.74,209.77,5.29,7.67,356.17,148.92,402.63,5.29,7.49,7.29
7,Clay,Corn,NPK_Fertilizer,161.58,45.54,241.39,3.73,7.68,332.9,121.44,366.84,3.73,7.4,9.67
8,Loamy,Wheat,NPK_Fertilizer,147.39,34.51,209.33,4.53,6.02,258.03,120.37,408.16,4.53,5.73,9.29
9,Clay,Rice,Manure,243.97,30.14,309.53,6.13,7.35,286.6,65.58,366.3,8.04,7.51,5.69


In [3]:
save_dataset(df, 'soil_amendment_impact.csv')

Dataset saved to soil_amendment_impact.csv with 1000 records
