In [3]:
from cookie_clusters import *
from selection_pixels_dtw import *
import os
import cv2
from PIL import Image
import rasterio as rio
import re
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import pandas as pd
from tsfresh import extract_features
import numpy as np

In [4]:
# Select the pixels we wanna work with, for controled analyses
pixels_de_interet = pix_danone + pix_agri + pix_ensta + pix_apt + pix_lac + pix_foret

In [5]:
# list of images
dir = "../../ressources/images"
images_list = os.listdir(dir)

# all images in a list, ready to be read
images_2A = list()
images_2B = list()
prog = re.compile(r'\w+2A')
for image in images_list:
    if prog.match(image):
        images_2A.append(image)
    else:
        images_2B.append(image)
        
images_2A.sort()
images_2B.sort()
all_images = images_2A + images_2B
all_images = sorted(all_images, key=lambda date: date[16:24])  

In [6]:
# Initiate lists for data frame
date = []
pixel_r = []
pixel_g = []
pixel_b = []
pixel_ir = []
x_coord = []
y_coord = []

# read each image --> select interesting pixels --> add to list
for temp, img in zip(range(len(all_images)), all_images):
    img = dir + "/" + img
    raster = rio.open(img)
    band_ir = raster.read(1)
    band_r = raster.read(2)
    band_g = raster.read(3)
    band_b = raster.read(4)

    for px_x, px_y in pixels_de_interet:
        ir = band_ir[px_x, px_y]
        r = band_r[px_x, px_y]
        g = band_g[px_x, px_y]
        b = band_b[px_x, px_y]
        
        date.append(temp)
        x_coord.append(px_x)
        y_coord.append(px_y)
        pixel_ir.append(ir)
        pixel_r.append(r)
        pixel_g.append(g)
        pixel_b.append(b)

In [7]:
# the final DF has the values of pixels of interest for all the chanels and for all period of time
dic = {'date': date,
       'x_coord': x_coord,
       'y_coord': y_coord,
       'pixel_ir': pixel_ir,
       'pixel_r': pixel_r,
       'pixel_g': pixel_g,
       'pixel_b': pixel_b}

df = pd.DataFrame(dic)

In [8]:
df

Unnamed: 0,date,x_coord,y_coord,pixel_ir,pixel_r,pixel_g,pixel_b
0,0,383,497,2765,2285,2347,1937
1,0,383,500,2398,2390,2143,2097
2,0,387,501,3014,2731,2313,1973
3,0,383,504,1213,1289,1115,1135
4,0,387,505,2704,2456,2195,1858
...,...,...,...,...,...,...,...
7609,140,480,570,3739,184,405,142
7610,140,482,570,3416,71,261,81
7611,140,484,570,3415,131,326,112
7612,140,486,570,2936,49,163,34


In [9]:
df_transforme = df.copy()
df_transforme['id'] = df_transforme['x_coord'].astype(str) + df_transforme['y_coord'].astype(str)

df_transforme = df_transforme.loc[:, ~df_transforme.columns.isin(['x_coord', 'y_coord'])]

df_transforme

Unnamed: 0,date,pixel_ir,pixel_r,pixel_g,pixel_b,id
0,0,2765,2285,2347,1937,383497
1,0,2398,2390,2143,2097,383500
2,0,3014,2731,2313,1973,387501
3,0,1213,1289,1115,1135,383504
4,0,2704,2456,2195,1858,387505
...,...,...,...,...,...,...
7609,140,3739,184,405,142,480570
7610,140,3416,71,261,81,482570
7611,140,3415,131,326,112,484570
7612,140,2936,49,163,34,486570


In [10]:
extracted_features = extract_features(df_transforme, column_id="id", column_sort="date")

Feature Extraction: 100%|██████████| 27/27 [00:03<00:00,  7.16it/s]


In [11]:
print(extracted_features.shape)
extracted_features.head()

(54, 3132)


Unnamed: 0,pixel_ir__variance_larger_than_standard_deviation,pixel_ir__has_duplicate_max,pixel_ir__has_duplicate_min,pixel_ir__has_duplicate,pixel_ir__sum_values,pixel_ir__abs_energy,pixel_ir__mean_abs_change,pixel_ir__mean_change,pixel_ir__mean_second_derivative_central,pixel_ir__median,...,pixel_b__fourier_entropy__bins_5,pixel_b__fourier_entropy__bins_10,pixel_b__fourier_entropy__bins_100,pixel_b__permutation_entropy__dimension_3__tau_1,pixel_b__permutation_entropy__dimension_4__tau_1,pixel_b__permutation_entropy__dimension_5__tau_1,pixel_b__permutation_entropy__dimension_6__tau_1,pixel_b__permutation_entropy__dimension_7__tau_1,pixel_b__query_similarity_count__query_None__threshold_0.0,pixel_b__mean_n_absolute_max__number_of_maxima_7
318438,1.0,0.0,0.0,1.0,433801.0,-29213.0,618.057143,16.885714,4.251799,2856.0,...,0.416201,0.807031,2.533341,1.75096,3.005995,4.040824,4.653974,4.850055,,2476.571429
322435,1.0,0.0,0.0,1.0,440503.0,31845.0,620.064286,16.478571,3.744604,2885.0,...,0.375112,0.814575,2.68514,1.730201,2.942467,3.997035,4.615699,4.812855,,2604.142857
324433,1.0,0.0,0.0,1.0,446175.0,-12887.0,609.628571,17.114286,3.586331,2890.0,...,0.416201,0.838551,2.553483,1.740177,2.944195,4.027603,4.62974,4.833393,,2647.0
329429,1.0,0.0,0.0,1.0,433640.0,-10322.0,614.842857,17.957143,4.794964,2773.0,...,0.375112,0.795049,2.654744,1.734601,2.93633,4.011184,4.666666,4.874468,,2571.285714
333426,1.0,0.0,0.0,1.0,439841.0,-3027.0,607.871429,17.1,4.100719,2711.0,...,0.375112,0.734323,2.545974,1.753395,2.986447,4.039991,4.670513,4.874468,,2608.0


In [12]:
extrac_features_NOna = extracted_features.dropna(axis=1)
extrac_features_NOna.shape

(54, 2646)

In [13]:
km = KMeans(n_clusters=6)
classes = km.fit_predict(extrac_features_NOna)
classes
# a, b = np.unique(classes, return_counts=True)
# print(a)
# print(b)


  super()._check_params_vs_input(X, default_n_init=10)


array([3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 5, 0, 0, 5, 5, 5, 4, 1, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

In [14]:
km_results = pd.DataFrame({'index': extrac_features_NOna.index, 'cluster': classes})
km_results.head()

Unnamed: 0,index,cluster
0,318438,3
1,322435,3
2,324433,3
3,329429,3
4,333426,3


In [15]:
test = [str(sublista[0])+str(sublista[1]) for sublista in pix_foret]

pix_id = {'pix_foret': [str(sublista[0])+str(sublista[1]) for sublista in pix_foret],
          'pix_danone': [str(sublista[0])+str(sublista[1]) for sublista in pix_danone],
          'pix_agri': [str(sublista[0])+str(sublista[1]) for sublista in pix_agri],
          'pix_ensta': [str(sublista[0])+str(sublista[1]) for sublista in pix_ensta],
          'pix_apt': [str(sublista[0])+str(sublista[1]) for sublista in pix_apt],
          'pix_lac': [str(sublista[0])+str(sublista[1]) for sublista in pix_lac]}

In [25]:
lista = []
for row in km_results.iterrows():
    for pix in pix_id.keys():
        if row[1][0] in pix_id[pix]:
            lista.append(pix)

km_results['lieu'] = lista
km_results

Unnamed: 0,index,cluster,lieu
0,318438,3,pix_agri
1,322435,3,pix_agri
2,324433,3,pix_agri
3,329429,3,pix_agri
4,333426,3,pix_agri
5,337424,3,pix_agri
6,339422,3,pix_agri
7,344418,3,pix_agri
8,350414,3,pix_agri
9,383497,0,pix_danone
