# Importation des données

In [1]:
import os.path

import pandas as pd
import numpy as np

from PIL import Image

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier

from skimage.feature import hog
from skimage.color import rgb2gray

from collections import Counter

In [2]:
data=pd.read_csv(r'./images/batiments.csv', delimiter=';')
data=data.drop(data.columns[1], axis=1)
data.columns=['ID','Label']
data

Unnamed: 0,ID,Label
0,1,Tuiles
1,2,Zinc Aluminium
2,3,Tuiles
3,4,Tuiles
4,5,Tuiles
...,...,...
1994,1995,Ardoises
1995,1996,Ardoises
1996,1997,Ardoises
1997,1998,Ardoises


In [3]:
#Method to load an image and return it as an array with pixels
def get_image(image_id, root=r'./images/'):
    file='{}.jpg'.format(image_id)
    image_path=os.path.join(root, file)
    image=Image.open(image_path)
    image_array=np.array(image)
    #print(image_path)
    return image_array

In [4]:
def create_features(img):
    color_features = img.flatten()
    flat_features = np.hstack(color_features)
    return flat_features

In [5]:
def create_feature_matrix(label_dataframe):
    features_list = []
    
    for img_id in label_dataframe.index:
        # load image
        img = get_image(img_id)
        # get features for image
        image_features = create_features(img)
        features_list.append(image_features)
        
    # convert list of arrays into a matrix
    feature_matrix = np.array(features_list)
    return feature_matrix

In [6]:
feature_matrix = create_feature_matrix(data)

KeyboardInterrupt: 

In [None]:
X=pd.DataFrame(feature_matrix)
y=pd.DataFrame(data.Label)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25,
                                                    random_state=1234123)

In [8]:
svm = SVC(kernel='linear', probability=True, random_state=400)
svm.fit(X_train, y_train)

  return f(*args, **kwargs)


SVC(kernel='linear', probability=True, random_state=400)

In [9]:
param_grid = {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.01, 0.1, 1]}

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
grid_search = GridSearchCV(svm, param_grid, cv=5)

In [None]:
grid_search.fit(X_train, y_train)

  return f(*args, **kwargs)


In [10]:
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy is: ', accuracy)

Model accuracy is:  0.582


In [42]:
y_test_list=y_test['Label'].tolist()

In [43]:
y_pred_list=y_pred.tolist()

In [44]:
cpt=len(y_pred_list)
#len(y_pred_list), len(y_test_list)

In [18]:
def calculate_weighted_f1_score(confusion_matrix, weights):
    # calculate precision for each class
    precision = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis = 0)
    # calculate recall for each class
    recall = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis = 1)
    # calculate F1-score for each class
    f1_score = 2 * (precision * recall) / (precision + recall)
    # weight the F1-score for each class
    print(f1_score)
    weighted_f1_score = f1_score * weights
    # return the average weighted F1-score
    return np.sum(weighted_f1_score)

In [45]:
frequency_actual = dict(Counter(y_test_list))
weight = [frequency_actual["Ardoises"],frequency_actual["Beton"],frequency_actual["Tuiles"],frequency_actual["Zinc Aluminium"]]
#print(pop,dim_zone,k,e,"/5")
weight_norm = [wg/cpt for wg in weight]
cm = confusion_matrix(y_test_list, y_pred_list)
#print(metrics.classification_report(actual, predicted,digits=3))
print(calculate_weighted_f1_score(cm,weight_norm))

[0.59722222 0.2345679  0.80916031 0.34394904]
0.581327723393642


In [53]:
clf = RandomForestClassifier(n_estimators=600,max_depth=6)
clf.fit(X_train, y_train)

  clf.fit(X_train, y_train)


RandomForestClassifier(max_depth=6, n_estimators=600)

In [54]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy is: ', accuracy)

Model accuracy is:  0.632


In [56]:
y_test_list=y_test['Label'].tolist()

In [57]:
y_pred_list=y_pred.tolist()

In [58]:
frequency_actual = dict(Counter(y_test_list))
weight = [frequency_actual["Ardoises"],frequency_actual["Beton"],frequency_actual["Tuiles"],frequency_actual["Zinc Aluminium"]]
#print(pop,dim_zone,k,e,"/5")
weight_norm = [wg/cpt for wg in weight]
cm = confusion_matrix(y_test_list, y_pred_list)
#print(metrics.classification_report(actual, predicted,digits=3))
print(calculate_weighted_f1_score(cm,weight_norm))

[0.66865672 0.09638554 0.7845805  0.38297872]
0.5779788362298565
