In [None]:
import json
import pandas as pd
import numpy as np


from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier 

from typing import Union, Tuple 

import joblib

import preprocessing as pp

In [None]:
def get_data(path: str) -> Union[list, list, list, list]:
    """Retrieve data from a json and split into two datasets 

    Args:
        path (str): json path 

    Returns:
        Union[list, list, list, list]: Train and Test dataset 
    """
    with open(f"{path}", 'r') as f:
        parcelles = json.load(f)
        f.close()

    X = []
    y = []
    for i in range(len(parcelles)):
        try:
            y.append(int(parcelles[str(i)]['prairie']))
            X.append(pp.correction(pp.month_ndvi(parcelles[str(i)]['data'])[0]))#+ correction(month_ndvi(parcelles[str(i)]['data'])[1])[2]
        except Exception:
            pass
            
    X_train, X_test = train_test_split(X, test_size=0.3, random_state=42)
    y_train, y_test = train_test_split(y, test_size=0.3, random_state=42)

    return X_train, X_test, y_train, y_test

In [None]:
def print_score(clf: RandomForestClassifier, X_test: list, y_test: list) -> None:
    """Display training score

    Args:
        clf (RandomForestClassifier): classifier 
        X_test (list): X test dataset 
        y_test (list): y test dataset 
    """
    pred = clf.predict(X_test)
    clf_report = pd.DataFrame(classification_report(y_test, pred, output_dict=True))
    print("Test Result:\n================================================")        
    print(f"Accuracy Score: {accuracy_score(y_test, pred) * 100:.2f}%")
    print("_______________________________________________")
    print(f"CLASSIFICATION REPORT:\n{clf_report}")
    print("_______________________________________________")
    print(f"Confusion Matrix: \n {confusion_matrix(y_test, pred)}\n")

In [None]:
def training(X_train: list, X_test: list, y_train: list, y_test: list) -> RandomForestClassifier:
    """classifier training 

    Args:
        X_train (list): X train dataset
        X_test (list): X test dataset
        y_train (list): y train dataset
        y_test (list): y test dataset

    Returns:
        RandomForestClassifier: classifier
    """
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    
    print_score(rfc, X_test, y_test)
    
    return rfc

In [27]:
def __main__():
    path = "C:/Users/ltuesta/Desktop/Files/Dev/local_python/data/data_region.json"
    X_train, X_test, y_train, y_test = get_data(path)

    rfc = training(X_train, X_test, y_train, y_test)

    # save model
    filename = "models/RandomForestPrairie.joblib"
    joblib.dump(rfc, filename)

In [None]:
__main__()