In [2]:
import pandas as pd
import numpy as np
import matplotlib.image as mpimg
from collections import Counter
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import pickle
import warnings

warnings.filterwarnings('ignore')

In [2]:
# Function to extract the average color of the elytre
def extract_avg_color(image_seg_path, img_paths):
    img = mpimg.imread(image_seg_path)
    #count the number of each unique pixel value in the image
    counts = Counter(img.flatten())
    counts_list = [key for key, _ in counts.most_common()]
    img[img == counts_list[0]] = 0
    #put everything else to 0 except the elytre
    img[img != counts_list[1]] = 0

    img[img != 0] = 1
    img = img*255
    img = np.expand_dims(img, axis=2)
    img = np.repeat(img, 3, axis=2)
    #apply the mask to the original image
    img = img * mpimg.imread(img_paths)
    img = np.array(img)  # Convertir en array numpy pour le calcul
    avg_color_per_row = np.average(img, axis=0)
    avg_color = np.average(avg_color_per_row, axis=0)[:3]  # Ignorer le canal alpha
    avg_color_str = ', '.join([str(c) for c in avg_color])
    return avg_color_str

In [3]:
df = pd.read_csv('training_labels_200.csv')
image_ids = df['Image_id'].values

In [4]:
img_paths = '/Users/alexis/Programmation/ai-project/data/training/im_seg/'

list_spot = []
list_mean_radius = []

for i,image_id in enumerate(image_ids):
    image_seg_path = img_paths + image_id +'_seg' + '.png'
    image_path = img_paths + image_id +'_im'+'.png'
    avg_color = extract_avg_color(image_seg_path, image_path)
    df.loc[i, 'avg_color'] = avg_color

In [5]:
#entrainemt d'un model pour predire la couleur des elytre
le = LabelEncoder()
df['Color_Encoded'] = le.fit_transform(df['Color'])

average_colors = np.array([np.fromstring(i, sep=',') for i in df['avg_color'].values])
color_labels = df['Color'].values

In [6]:
X_train, X_test, y_train, y_test = train_test_split(average_colors, color_labels, test_size=0.2, random_state=42)
# Créer et entraîner le classificateur
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Prédire les labels de couleur sur les données de test
y_pred = clf.predict(X_test)

# Calculer la précision
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
print('accuracy : ', accuracy_score(y_test, y_pred))

accuracy :  0.8


In [7]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           B       1.00      0.75      0.86         4
           O       0.67      0.86      0.75        14
           R       0.89      0.77      0.83        22

    accuracy                           0.80        40
   macro avg       0.85      0.79      0.81        40
weighted avg       0.83      0.80      0.80        40



In [8]:
# Sauvegarder le modèle
with open('finalized_color_model.sav', 'wb') as f:
    pickle.dump(clf, f)

# Sauvegarder le LabelEncoder
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(le, f)