In [2]:
import numpy as np
import pandas as pd

In [5]:
N = 1000  # Number of samples

rainfall = np.random.uniform(800, 2000, N)            # mm/year
avg_temp = np.random.uniform(18, 28, N)               # °C of course
elevation = np.random.uniform(800, 2000, N)           # meters
fertilizer = np.random.uniform(30, 150, N)            # kg/ha
tree_age = np.random.uniform(2, 15, N)                # years
irrigation = np.random.choice([0, 1], N, p=[0.6, 0.4])  # binary
soil_quality = np.random.choice([0, 1, 2], N, p=[0.3, 0.5, 0.2])  # 0=poor, 1=moderate, 2=rich

In [12]:
yield_base = (
    0.005 * rainfall +                        # more rain helps
    -0.1 * (avg_temp - 22)**2 +              # ideal temp ~22°C
    0.03 * fertilizer -                      # more fertilizer helps
    0.5 * tree_age +                         # older trees yield more until a point
    100 * irrigation +                       # irrigation boost
    200 * soil_quality                       # soil impact
)

yield_base -= (0.003* elevation-1000)

noise = np.random.normal(0, 200, N)  # kg/ha

yield_kg_per_ha = np.clip(yield_base + noise, 0, None)

In [10]:
df = pd.DataFrame({
    "rainfall_mm": rainfall,
    "avg_temp": avg_temp,
    "elevation_m": elevation,
    "fertilizer_kg": fertilizer,
    "tree_age_yrs": tree_age,
    "irrigation": irrigation,
    "soil_quality": soil_quality,
    "yield_kg_per_ha": yield_kg_per_ha
})
df.to_csv('data.csv', index=False) 