In [None]:
from sklearn.decomposition import PCA,KernelPCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, f1_score
import tensorflow as tf
from sklearn.manifold import Isomap,MDS
import numpy as np

# Load data

In [None]:

DATASETS_FILE_NAMES = {
    "Carotte": {
        "x": "combined_daily_meteo.csv",
        "y": "carrot_no_sensitive_data.csv",
        "d": "field_distance.txt"
    },
    "Laitue": {
        "x": "combined_daily_meteo.csv",
        "y": "lettuce_no_sensitive_data.csv",
        "d": "field_distance.txt"
    },
    "Oignon": {
        "x": "combined_daily_meteo.csv",
        "y": "onion_no_sensitive_data.csv",
        "d": "field_distance.txt"
    }
}

DATASETS = {}
for name in DATASETS_FILE_NAMES:
    DATASETS[name] = {}
    for k, v in DATASETS_FILE_NAMES[name].items():
        if k == "d":
            DATASETS[name][k] = pd.read_csv(f"data/{name}/{v}", header=None)
        else:
            DATASETS[name][k] = pd.read_csv(f"data/{name}/{v}")

# Classification

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, make_scorer, precision_score
import numpy as np


DATASETS = {}
for name in DATASETS_FILE_NAMES:
    DATASETS[name] = {}
    for k, v in DATASETS_FILE_NAMES[name].items():
        if k == "d":
            DATASETS[name][k] = pd.read_csv(f"data/{name}/{v}", header=None)
        else:
            DATASETS[name][k] = pd.read_csv(f"data/{name}/{v}")


def normalize(sub_df):
    min_max_scaler = preprocessing.MinMaxScaler()
    sub_df_scaled = min_max_scaler.fit_transform(sub_df)
    return(pd.DataFrame(sub_df_scaled, index=sub_df.index, columns=sub_df.columns))


def preprocess_data_classification(crop, obs_df, meteo_df):
    obs_df.rename(columns={'SampleDate':'Date'}, inplace=True)
    if crop == 'Oignon':
        
        obs_df.loc[obs_df['cote_b_squamosa'] >= 1, 'cote_b_squamosa'] = 1
        obs_df.loc[obs_df['cote_p_destructor'] >= 1, 'cote_p_destructor'] = 1
        obs_df.loc[obs_df['cote_s_vesicarium'] >= 1, 'cote_s_vesicarium'] = 1 
        unique_sample_date = obs_df['Date'].unique()
        unique_sample_date = meteo_df[meteo_df['Date'].isin(unique_sample_date)]
        combined_df = obs_df.merge(meteo_df, on=['FarmID', 'Date'])
        label_df = combined_df.get('cote_b_squamosa')
        combined_df = combined_df.drop(['cote_b_squamosa', 'cote_p_destructor', 'cote_s_vesicarium', 'Bulb_onions_date'], axis=1)
    elif crop == 'Laitue':
        obs_df.loc[obs_df['cote_b_lactucae'] >= 1, 'cote_b_lactucae'] = 1
        obs_df.loc[obs_df['incidence_sclerotinia'] >= 1, 'incidence_sclerotinia'] = 1 
        obs_df.loc[obs_df['incidence_b_cinerea'] >= 1, 'incidence_b_cinerea'] = 1 
        unique_sample_date = obs_df['Date'].unique()
        unique_sample_date = meteo_df[meteo_df['Date'].isin(unique_sample_date)]
        combined_df = obs_df.merge(meteo_df, on=['FarmID', 'Date'])
        label_df = combined_df.get('cote_b_lactucae')
        combined_df = combined_df.drop(['cote_b_lactucae', 'incidence_sclerotinia', 'incidence_b_cinerea', 'Pommaison_lettuce_date'], axis=1)
    elif crop == 'Carotte':
        print(obs_df[['cote_c_carotae','incidence_a_dauci','incidence_s_sclerotiorum']])
        obs_df = obs_df.drop(obs_df[obs_df['FarmID'] == 0].index)
        obs_df.loc[obs_df['cote_c_carotae'] >= 1, 'cote_c_carotae'] = 1
        obs_df.loc[obs_df['incidence_s_sclerotiorum'] >= 1, 'incidence_s_sclerotiorum'] = 1 
        obs_df.loc[obs_df['incidence_a_dauci'] >= 1, 'incidence_a_dauci'] = 1
        unique_sample_date = obs_df['Date'].unique()
        unique_sample_date = meteo_df[meteo_df['Date'].isin(unique_sample_date)]
        combined_df = obs_df.merge(meteo_df, on=['FarmID', 'Date'])
        label_df = combined_df.get('cote_c_carotae')
        combined_df = combined_df.drop(['cote_c_carotae', 'incidence_s_sclerotiorum', 'incidence_a_dauci'], axis=1)
    return combined_df, label_df

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor


def DecisionTreeModel(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)
    clf = DecisionTreeRegressor()
    param_dist = {
        'criterion': ['poisson'],
        'max_depth': [1, 2, 3, 4, 5],
        'min_samples_split': [5, 10, 20],
        'splitter': ['best']
    }
    scoring = {
        'accuracy': make_scorer(accuracy_score),
        'roc_auc': make_scorer(roc_auc_score),
        'f1': make_scorer(f1_score)
    }
    grid = GridSearchCV(clf, param_grid=param_dist, n_jobs=-1, cv=5, scoring=scoring, refit='accuracy')
    grid.fit(x_train, y_train)
    clf = grid.best_estimator_
    y_pred = clf.predict(x_test)
    return r2_score(y_test, y_pred)


def kNNModel(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)
    clf = KNeighborsRegressor()
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    return r2_score(y_test, y_pred)


def RandomForestModel(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)
    clf = RandomForestRegressor()
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    return r2_score(y_test, y_pred)

In [None]:
N_RUNS = 5  
models = (
    ("DT", DecisionTreeModel),
    ("k-NN", kNNModel),
    ("RF", RandomForestModel),
)

for crop in DATASETS:
    print(f"\n🌾 Culture : {crop}")
    x, y = preprocess_data_classification(crop, DATASETS[crop]["y"], DATASETS[crop]["x"])

   
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x)

    
    kpca = KernelPCA(n_components=22)
    x_kpca = kpca.fit_transform(x_scaled)

    print(f"📉 KernelPCA : {x.shape[1]} ➝ {x_pca.shape[1]} dimensions pour {crop}")
    
    
    y = y.values if hasattr(y, 'values') else y
    y = y / max(y)

    
    acc_scores = {name: [] for name, _ in models}
    prec_scores = {name: [] for name, _ in models}
    f_scores = {name: [] for name, _ in models}

    for run in range(N_RUNS):
        print(f"   ▶️ Run {run + 1}/{N_RUNS}")
        for name, model in models:
             a, p, f = model(x_kpca, y)
             acc_scores[name].append(a)
             prec_scores[name].append(p)
             f_scores[name].append(f)
           
             
             print(f"    {name}: accuracy= {a:.4}, precision={p:.4}, f1={f:.4}")
   
    print(f"\n📊 Moyennes des {N_RUNS} runs pour {crop} :")
    for name in acc_scores:
        acc_mean = np.mean(acc_scores[name])
        prec_mean = np.mean(prec_scores[name])
        f1_mean = np.mean(f_scores[name])
        print(f"    ▶️ {name} : Accuracy = {acc_mean:.4f}, Precision = {prec_mean:.4f}, F1-score = {f1_mean:.4f}")