In [25]:
import pandas as pd
import numpy as np
from random import choice, randint
from datetime import datetime

np.random.seed(42)

Crop charactristics

In [26]:
crop_characteristics = {
    "Wheat": {
        "N": (100, 130), "P": (40, 60), "K": (60, 90),
        "temp": (15, 20), "humidity": (50, 70), "ph": (6.0, 7.0), "rainfall": (50, 90),
        "duration": 6,
        "avoid_after": ["Wheat", "Barley", "Oat"],       # монокультура и близкородственные
        "recommend_after": ["Corn", "Sunflower", "Soybean"],
        "min_gap_months": 24,
        "prob": 0.25
    },
    "Barley": {
        "N": (90, 120), "P": (35, 55), "K": (50, 80),
        "temp": (14, 19), "humidity": (50, 70), "ph": (6.0, 7.0), "rainfall": (50, 80),
        "duration": 5,
        "avoid_after": ["Barley", "Oat", "Wheat"],
        "recommend_after": ["Wheat", "Pea", "Rapeseed"],
        "min_gap_months": 24,
        "prob": 0.10
    },
    "Oat": {
        "N": (80, 110), "P": (30, 50), "K": (50, 70),
        "temp": (12, 18), "humidity": (55, 75), "ph": (5.8, 6.8), "rainfall": (50, 80),
        "duration": 5,
        "avoid_after": ["Oat", "Barley"],
        "recommend_after": ["Wheat", "Pea"],
        "min_gap_months": 24,
        "prob": 0.05
    },
    "Corn": {
        "N": (140, 170), "P": (50, 70), "K": (100, 140),
        "temp": (18, 25), "humidity": (60, 70), "ph": (6.0, 6.8), "rainfall": (70, 120),
        "duration": 7,
        "avoid_after": ["Corn"],
        "recommend_after": ["Wheat", "Soybean", "Pea"],
        "min_gap_months": 36,
        "prob": 0.08
    },
    "Sunflower": {
        "N": (110, 140), "P": (40, 60), "K": (90, 120),
        "temp": (18, 24), "humidity": (55, 65), "ph": (6.0, 7.0), "rainfall": (60, 100),
        "duration": 7,
        "avoid_after": ["Sunflower"],
        "recommend_after": ["Wheat", "Barley", "Corn"],
        "min_gap_months": 36,
        "prob": 0.15
    },
    "Soybean": {
        "N": (130, 150), "P": (50, 70), "K": (80, 110),
        "temp": (18, 22), "humidity": (60, 70), "ph": (6.0, 6.8), "rainfall": (60, 100),
        "duration": 6,
        "avoid_after": ["Soybean"],
        "recommend_after": ["Corn", "Wheat"],
        "min_gap_months": 24,
        "prob": 0.05
    },
    "Rapeseed": {
        "N": (90, 120), "P": (40, 60), "K": (70, 100),
        "temp": (15, 20), "humidity": (55, 65), "ph": (6.0, 6.8), "rainfall": (50, 90),
        "duration": 6,
        "avoid_after": ["Rapeseed"],
        "recommend_after": ["Wheat", "Barley"],
        "min_gap_months": 36,
        "prob": 0.05
    },
    "SugarBeet": {
        "N": (150, 170), "P": (60, 80), "K": (120, 150),
        "temp": (18, 22), "humidity": (60, 70), "ph": (6.0, 6.8), "rainfall": (70, 120),
        "duration": 7,
        "avoid_after": ["SugarBeet"],
        "recommend_after": ["Wheat", "Barley", "Rapeseed"],
        "min_gap_months": 36,
        "prob": 0.05
    },
    "Pea": {
        "N": (80, 110), "P": (30, 50), "K": (50, 70),
        "temp": (15, 20), "humidity": (55, 70), "ph": (6.0, 6.8), "rainfall": (50, 90),
        "duration": 5,
        "avoid_after": ["Pea"],
        "recommend_after": ["Wheat", "Barley"],
        "min_gap_months": 24,
        "prob": 0.05
    },
    "Potato": {
        "N": (120, 150), "P": (50, 70), "K": (100, 130),
        "temp": (16, 20), "humidity": (60, 70), "ph": (5.8, 6.5), "rainfall": (70, 120),
        "duration": 6,
        "avoid_after": ["Potato"],
        "recommend_after": ["Wheat", "Barley", "Rapeseed"],
        "min_gap_months": 36,
        "prob": 0.17
    }
}

Parameters

In [27]:
fields_count = 10000
fields = [f"field_{i}" for i in range(1, fields_count)]
start_year = 2015
end_year = 2025

data = []

Generation

In [28]:
for field in fields:
    current_month = pd.Timestamp(start_year, 3, 1)
    last_crop_info = {"crop": None, "end_month": None, "duration": 0}

    while current_month.year <= end_year:
        available_crops = []
        for crop, char in crop_characteristics.items():
            if last_crop_info["crop"] is None:
                available_crops.append(crop)
            else:
                months_since_last = (current_month - last_crop_info["end_month"]).days / 30 if last_crop_info["end_month"] else 100
                if crop != last_crop_info["crop"] and months_since_last >= char["min_gap_months"] and last_crop_info["crop"] not in char["avoid_after"]:
                    available_crops.append(crop)
        if not available_crops:
            available_crops = list(crop_characteristics.keys())

        crops = [c for c in available_crops]
        probs = [crop_characteristics[c]["prob"] for c in crops]
        probs = np.array(probs) / sum(probs)
        crop = np.random.choice(crops, p=probs)
        char = crop_characteristics[crop]
        duration = char["duration"]
        end_month = current_month + pd.DateOffset(months=duration-1)

        n = randint(*char["N"])
        p = randint(*char["P"])
        k = randint(*char["K"])
        temperature = round(np.random.uniform(*char["temp"]), 1)
        humidity = round(np.random.uniform(*char["humidity"]), 1)
        ph = round(np.random.uniform(*char["ph"]), 1)
        rainfall = randint(*char["rainfall"])

        data.append([
            field, crop, current_month.strftime("%Y-%m"), end_month.strftime("%Y-%m"), duration,
            n, p, k, temperature, humidity, ph, rainfall,
            last_crop_info["crop"], last_crop_info["duration"]
        ])

        # обновляем историю
        last_crop_info = {"crop": crop, "end_month": end_month, "duration": duration}
        current_month = end_month + pd.DateOffset(months=1)

In [29]:
df = pd.DataFrame(data, columns=[
    "field_id","crop","start_month","end_month","duration_months",
    "N","P","K","temperature","humidity","pH","rainfall",
    "last_crop","last_crop_duration"
])

In [30]:
df[df['crop'] == 'Wheat'].head(10)

Unnamed: 0,field_id,crop,start_month,end_month,duration_months,N,P,K,temperature,humidity,pH,rainfall,last_crop,last_crop_duration
1,field_1,Wheat,2015-08,2016-01,6,120,46,73,15.8,51.2,6.9,90,Oat,5
8,field_1,Wheat,2019-05,2019-10,6,123,46,60,19.7,69.3,6.8,74,Sunflower,7
10,field_1,Wheat,2020-04,2020-09,6,127,59,64,17.5,50.7,6.9,70,Barley,5
14,field_1,Wheat,2022-04,2022-09,6,123,56,76,16.0,50.9,6.3,78,Potato,6
17,field_1,Wheat,2023-08,2024-01,6,122,59,69,19.9,65.4,6.2,53,Barley,5
18,field_1,Wheat,2024-02,2024-07,6,119,43,88,19.1,64.1,6.7,71,Wheat,6
25,field_2,Wheat,2016-11,2017-04,6,126,45,84,18.2,56.3,6.5,71,Sunflower,7
27,field_2,Wheat,2017-11,2018-04,6,119,57,70,15.4,55.8,6.2,69,Potato,6
31,field_2,Wheat,2019-09,2020-02,6,112,50,90,17.1,66.4,6.9,60,Pea,5
32,field_2,Wheat,2020-03,2020-08,6,114,49,75,17.6,58.3,6.2,61,Wheat,6


In [31]:
df[df['field_id'] == 'field_1'].head(10)

Unnamed: 0,field_id,crop,start_month,end_month,duration_months,N,P,K,temperature,humidity,pH,rainfall,last_crop,last_crop_duration
0,field_1,Oat,2015-03,2015-07,5,100,44,56,17.7,69.6,6.4,70,,0
1,field_1,Wheat,2015-08,2016-01,6,120,46,73,15.8,51.2,6.9,90,Oat,5
2,field_1,Sunflower,2016-02,2016-08,7,134,52,110,22.2,55.2,7.0,79,Wheat,6
3,field_1,Potato,2016-09,2017-02,6,140,54,130,16.8,61.8,5.9,83,Sunflower,7
4,field_1,Barley,2017-03,2017-07,5,91,53,77,16.6,58.6,6.3,50,Potato,6
5,field_1,Sunflower,2017-08,2018-02,7,116,60,111,18.8,57.9,6.4,89,Barley,5
6,field_1,Corn,2018-03,2018-09,7,153,54,121,23.5,62.0,6.4,105,Sunflower,7
7,field_1,Sunflower,2018-10,2019-04,7,128,46,112,18.3,61.1,6.2,86,Corn,7
8,field_1,Wheat,2019-05,2019-10,6,123,46,60,19.7,69.3,6.8,74,Sunflower,7
9,field_1,Barley,2019-11,2020-03,5,117,35,68,14.5,63.7,6.4,65,Wheat,6


In [32]:
df.head(10)

Unnamed: 0,field_id,crop,start_month,end_month,duration_months,N,P,K,temperature,humidity,pH,rainfall,last_crop,last_crop_duration
0,field_1,Oat,2015-03,2015-07,5,100,44,56,17.7,69.6,6.4,70,,0
1,field_1,Wheat,2015-08,2016-01,6,120,46,73,15.8,51.2,6.9,90,Oat,5
2,field_1,Sunflower,2016-02,2016-08,7,134,52,110,22.2,55.2,7.0,79,Wheat,6
3,field_1,Potato,2016-09,2017-02,6,140,54,130,16.8,61.8,5.9,83,Sunflower,7
4,field_1,Barley,2017-03,2017-07,5,91,53,77,16.6,58.6,6.3,50,Potato,6
5,field_1,Sunflower,2017-08,2018-02,7,116,60,111,18.8,57.9,6.4,89,Barley,5
6,field_1,Corn,2018-03,2018-09,7,153,54,121,23.5,62.0,6.4,105,Sunflower,7
7,field_1,Sunflower,2018-10,2019-04,7,128,46,112,18.3,61.1,6.2,86,Corn,7
8,field_1,Wheat,2019-05,2019-10,6,123,46,60,19.7,69.3,6.8,74,Sunflower,7
9,field_1,Barley,2019-11,2020-03,5,117,35,68,14.5,63.7,6.4,65,Wheat,6


In [33]:
df.to_csv("agro_dataset_with_rules.csv", index=False)