In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import jaccard_score, classification_report, confusion_matrix
import pandas as pd
import geopandas as gpd
import numpy as np

from main import CRLoader as Loader, FExtractorB as FExtractor

new_features = [gpd.read_file(f'/home/stagiaire/D/D/mesh/{i}_clean.shp') for i in range(1, 5)]

to_keep = ['p_vegeta', 'len_roads', 'profile_co', 'nb_connect', 'min_connec', 'slope', 'id']
for i in range(4):
    new_features[i] = new_features[i].drop(columns=set(new_features[i].columns) - set(to_keep))
    
sample_size = 120
batch_size = 16

it = 5
metrics = []
feature_importance = [[] for _ in range(774)]
feature_importance = np.array(feature_importance)

for i in range(it):

    processor = Loader(opt_root_dir=f"/home/stagiaire/D/D/patchs/{412}n", sar_root_dir=f"/home/stagiaire/D/D/patchs/{412}Rn", num_folds=1)
    processor.load_data(sample_size=sample_size, batch_size=batch_size)
    loader = processor.loaders[0]
    feature_extractor = FExtractor(dataloader=loader, use_8_bit=True)
    train_features, train_labels, train_ids = feature_extractor.extract_features()
    
    train_dict = {}
    for i in range(len(train_ids)):
        train_dict[train_ids[i]] = [train_features[i].tolist(), train_labels[i]]

    new_tr_feat = pd.concat(new_features, ignore_index=True)
    new_tr_feat = new_tr_feat[new_tr_feat['id'].isin(train_ids)]

    for index, row in new_tr_feat.iterrows():
        row_values = row[new_tr_feat.columns != 'id'].tolist()
        train_dict[row['id']][0] = train_dict[row['id']][0] + row_values

    train_features = []
    train_labels = []
    train_ids = []
    
    for key, value in train_dict.items():
        train_ids.append(key)
        train_features.append(value[0])
        train_labels.append(value[1])
    
    processor = Loader(opt_root_dir=f"/home/stagiaire/D/D/patchs/{3}n", sar_root_dir=f"/home/stagiaire/D/D/patchs/{3}Rn", num_folds=1)
    processor.load_data(sample_size=sample_size, batch_size=batch_size)
    loader = processor.loaders[0]
    feature_extractor = FExtractor(dataloader=loader, use_8_bit=True)
    test_features, test_labels, test_ids = feature_extractor.extract_features()
    
    test_dict = {}
    for i in range(len(test_ids)):
        test_dict[test_ids[i]] = [test_features[i].tolist(), test_labels[i]]

    new_te_features = new_features[2][new_features[2]['id'].isin(test_ids)]

    for index, row in new_te_features.iterrows():
        row_values = row[new_te_features.columns != 'id'].tolist()
        test_dict[row['id']][0] = test_dict[row['id']][0] + row_values

    test_features = []
    test_labels = []
    test_ids = []
    
    for key, value in test_dict.items():
        test_ids.append(key)
        test_features.append(value[0])
        test_labels.append(value[1])
    
    rf_model = RandomForestClassifier(n_estimators=400)
    rf_model.fit(np.array(train_features), np.array(train_labels))
    
    predictions = rf_model.predict(np.array(test_features))
    predictions_inv, test_labels_inv = [1 if p == 0 else 0 for p in predictions], [1 if l == 0 else 0 for l in test_labels]
    
    feature_importance = np.hstack((feature_importance, (np.array(rf_model.feature_importances_).reshape(-1, 1))))
    
    IoU = jaccard_score(test_labels_inv, predictions_inv)
    cm = confusion_matrix(test_labels_inv, predictions_inv)
    df_cm = pd.DataFrame(cm, index=['Actual Class 0', 'Actual Class 1'], columns=['Predicted Class 0', 'Predicted Class 1'])
    cr = classification_report(test_labels_inv, predictions_inv)

    print(f"\nJaccard index: {IoU*100: 0.1f}%\n")
    print(f"\n{df_cm}\n")
    print(f"\n{cr}\n")
    metrics.append([cm, cr, IoU])

In [None]:
from R import RGenerator

report = RGenerator(metrics).report()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

feature_importance = np.array([np.sum(f) for f in feature_importance])

plt.figure(figsize=(6, 10))
plt.barh(range(774), feature_importance, align='center', color='black', edgecolor='blue')
plt.yticks(range(0, 774, 100))
plt.xlabel('')
plt.title('')
plt.gca().invert_yaxis()
plt.show()

In [None]:
print(sum(feature_importance[:768]/768))
print(sum(feature_importance[768:]/6))

print(sum(feature_importance[:768]))
print(sum(feature_importance[768:]))