In [3]:
import pandas as pd
import numpy as np

np.random.seed(42)  # for reproducibility

def generate_soil_crop_data(num_samples=100):
    # Locations (categorical)
    locations = ['North', 'South', 'East', 'West', 'Central']
    location_data = np.random.choice(locations, size=num_samples)
    
    # Soil pH: slightly acidic to neutral, typical for crop growth
    soil_pH = np.random.normal(loc=6.5, scale=0.7, size=num_samples)
    soil_pH = np.clip(soil_pH, 4.5, 8.5)

    # Soil Moisture (%): varies, but typically 10-50% in arable land
    soil_moisture_percentage = np.random.uniform(10, 50, size=num_samples)

    # Soil Organic Matter (%): generally 1-10%
    soil_organic_matter_percentage = np.random.normal(loc=4, scale=1.5, size=num_samples)
    soil_organic_matter_percentage = np.clip(soil_organic_matter_percentage, 1, 15)

    # Soil Nitrogen Content (mg/kg): 500-3000 typical range
    soil_nitrogen_content = np.random.normal(loc=1500, scale=500, size=num_samples)
    soil_nitrogen_content = np.clip(soil_nitrogen_content, 300, 3500)

    # Soil Phosphorus Content (mg/kg): 10-80 typical range
    soil_phosphorus_content = np.random.normal(loc=40, scale=15, size=num_samples)
    soil_phosphorus_content = np.clip(soil_phosphorus_content, 5, 100)

    # Soil Potassium Content (mg/kg): 100-600 typical range
    soil_potassium_content = np.random.normal(loc=300, scale=100, size=num_samples)
    soil_potassium_content = np.clip(soil_potassium_content, 50, 700)

    # Average Daily Temperature during growing season (°C): 18-32
    average_daily_temperature = np.random.normal(loc=25, scale=4, size=num_samples)
    average_daily_temperature = np.clip(average_daily_temperature, 18, 32)

    # Total Rainfall During Growing Season (mm): 400-1200 mm
    total_rainfall = np.random.normal(loc=800, scale=250, size=num_samples)
    total_rainfall = np.clip(total_rainfall, 400, 1200)

    # Crop Yield (tons per hectare)
    # Simulate crop yield influenced by soil nitrogen, moisture, temperature, organic matter, and rainfall.
    # Basic linear model with noise
    crop_yield = (
        0.002 * soil_nitrogen_content +
        0.05 * soil_moisture_percentage +
        0.1 * soil_organic_matter_percentage +
        0.0015 * total_rainfall -
        0.03 * np.abs(average_daily_temperature - 25) +  # Optimal temp ~25°C
        np.random.normal(0, 0.3, num_samples)  # random noise
    )
    crop_yield = np.clip(crop_yield, 0.5, None)  # yield cannot be negative, minimum 0.5 t/ha

    # Build DataFrame
    df = pd.DataFrame({
        'Location': location_data,
        'Soil pH': soil_pH.round(2),
        'Soil Moisture (%)': soil_moisture_percentage.round(1),
        'Soil Organic Matter (%)': soil_organic_matter_percentage.round(2),
        'Soil Nitrogen Content (mg/kg)': soil_nitrogen_content.round(0),
        'Soil Phosphorus Content (mg/kg)': soil_phosphorus_content.round(1),
        'Soil Potassium Content (mg/kg)': soil_potassium_content.round(0),
        'Average Daily Temperature (⁰C)': average_daily_temperature.round(1),
        'Total Rainfall (mm)': total_rainfall.round(0),
        'Crop Yield (tons/ha)': crop_yield.round(2)
    })
    
    return df

# Generate and show sample data
soil_crop_data = generate_soil_crop_data(100)
soil_crop_data.to_csv('Soil Health.csv', index=False)
display(soil_crop_data.head(10))


Unnamed: 0,Location,Soil pH,Soil Moisture (%),Soil Organic Matter (%),Soil Nitrogen Content (mg/kg),Soil Phosphorus Content (mg/kg),Soil Potassium Content (mg/kg),Average Daily Temperature (⁰C),Total Rainfall (mm),Crop Yield (tons/ha)
0,West,5.87,38.0,3.19,2407.0,31.6,480.0,26.7,1200.0,8.72
1,Central,6.42,43.9,5.17,1702.0,78.1,293.0,23.4,822.0,7.6
2,East,7.53,44.3,3.93,1545.0,61.0,357.0,20.1,601.0,6.53
3,Central,5.71,26.2,4.78,1541.0,34.2,383.0,20.6,1200.0,6.53
4,Central,7.17,45.5,3.41,1371.0,57.2,125.0,27.2,954.0,6.92
5,South,7.71,44.0,4.66,2092.0,47.9,562.0,19.6,500.0,7.39
6,East,6.82,47.4,6.5,1247.0,43.5,257.0,30.6,539.0,6.15
7,East,5.32,41.4,2.89,822.0,52.0,349.0,22.2,744.0,4.87
8,East,6.73,36.8,3.84,709.0,40.8,364.0,27.5,874.0,4.4
9,Central,6.44,33.2,3.27,1540.0,43.6,219.0,22.5,956.0,6.6
