# Recommandation de plantes : algo simple KNN

In [None]:
import pandas as pd

from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import RobustScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import make_column_transformer

## 1. Preparation des données

In [90]:
plants_dataset = pd.read_csv('../data/processed/plants_clean_dataset.csv')
plants_dataset.head()

Unnamed: 0,common_name,scientific_name,type,maintenance,sunlight,drought_tolerant,salt_tolerant,thorny,poisonous_to_humans,poisonous_to_pets,invasive,edible_fruit,medicinal,hardiness_min,hardiness_max,is_perennial,attracts_birds,attracts_butterflies
0,European Silver Fir,['Abies alba'],arbres,moderate,full_sun,False,False,False,False,False,False,False,True,7.0,7.0,True,False,False
1,Fraser Fir,['Abies fraseri'],arbres,moderate,full_sun,False,False,False,False,False,False,False,False,6.0,6.0,True,False,False
2,Golden Korean Fir,"[""Abies koreana 'Aurea'""]",arbres,moderate,full_sun,False,False,False,False,False,False,False,False,6.0,6.0,True,False,False
3,Blue Spanish Fir,"[""Abies pinsapo 'Glauca'""]",arbres,moderate,full_sun,True,True,False,False,False,False,False,False,7.0,7.0,True,False,False
4,Noble Fir,['Abies procera'],arbres,moderate,full_sun,False,False,False,False,False,False,False,True,8.0,8.0,True,True,False


In [91]:
# On retire les plantes toxiques pour les humains et les animaux de compagnie :
mask_safe_humans = (plants_dataset['poisonous_to_humans'] == False)
mask_safe_pets = (plants_dataset['poisonous_to_pets'] == False)
original_shape = plants_dataset.shape

plants_dataset = plants_dataset.loc[mask_safe_humans & mask_safe_pets]
print(f'{original_shape[0] - plants_dataset.shape[0]} lignes supprimées')

203 lignes supprimées


In [92]:
# On supprime les colonnes inutiles pour la recommandation :
# Il reste 2 variables numériques (hardiness_min, hardiness_max) et 15 catégorielles.
plants_dataset = plants_dataset.drop(columns=['poisonous_to_humans', 'poisonous_to_pets', 'invasive'])
plants_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1636 entries, 0 to 1838
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   common_name           1636 non-null   object 
 1   scientific_name       1636 non-null   object 
 2   type                  1636 non-null   object 
 3   maintenance           1636 non-null   object 
 4   sunlight              1636 non-null   object 
 5   drought_tolerant      1636 non-null   bool   
 6   salt_tolerant         1636 non-null   bool   
 7   thorny                1636 non-null   bool   
 8   edible_fruit          1636 non-null   bool   
 9   medicinal             1636 non-null   bool   
 10  hardiness_min         1636 non-null   float64
 11  hardiness_max         1636 non-null   float64
 12  is_perennial          1636 non-null   bool   
 13  attracts_birds        1636 non-null   bool   
 14  attracts_butterflies  1636 non-null   bool   
dtypes: bool(8), float64(2), ob

In [109]:
# colonnes à ne pas donner à l'algo : 
X = plants_dataset.drop(columns=['common_name', 'scientific_name', 'hardiness_min'])
X.head()

Unnamed: 0,type,maintenance,sunlight,drought_tolerant,salt_tolerant,thorny,edible_fruit,medicinal,hardiness_max,is_perennial,attracts_birds,attracts_butterflies
0,arbres,moderate,full_sun,False,False,False,False,True,7.0,True,False,False
1,arbres,moderate,full_sun,False,False,False,False,False,6.0,True,False,False
2,arbres,moderate,full_sun,False,False,False,False,False,6.0,True,False,False
3,arbres,moderate,full_sun,True,True,False,False,False,7.0,True,False,False
4,arbres,moderate,full_sun,False,False,False,False,True,8.0,True,True,False


In [125]:
# choix transformations :

preprocessor = make_column_transformer((OneHotEncoder(), ['type']), 
                                       (RobustScaler(), ['hardiness_max']),
                                       (OrdinalEncoder(categories=[['low', 'moderate', 'high']]), ['maintenance']),
                                       (OrdinalEncoder(categories=[['full_shade', 'part_shade', 'full_sun']]), ['sunlight']), remainder="passthrough", force_int_remainder_cols=False) #'hardiness_min', 

X_transformed = pd.DataFrame(preprocessor.fit_transform(X), columns=preprocessor.get_feature_names_out())
X_transformed.head()

Unnamed: 0,onehotencoder__type_arbres,onehotencoder__type_arbustes,onehotencoder__type_fleurs,onehotencoder__type_herbes,onehotencoder__type_plantes_grimpantes,onehotencoder__type_potager,onehotencoder__type_succulentes,robustscaler__hardiness_max,ordinalencoder-1__maintenance,ordinalencoder-2__sunlight,remainder__drought_tolerant,remainder__salt_tolerant,remainder__thorny,remainder__edible_fruit,remainder__medicinal,remainder__is_perennial,remainder__attracts_birds,remainder__attracts_butterflies
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,2.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0


## 2. Implémentation de NearestNeighbors

In [126]:
nn = NearestNeighbors(n_neighbors=7)
nn.fit(preprocessor.transform(X))

In [127]:
# Fausses données utilisateur

user_data = pd.read_csv('../data/raw/fausses_donnees_utilisateur.csv')
user_data.head()

Unnamed: 0,type,maintenance,sunlight,drought_tolerant,salt_tolerant,thorny,edible_fruit,medicinal,hardiness_min,hardiness_max,is_perennial,attracts_birds,attracts_butterflies
0,fleurs,moderate,part_shade,False,False,True,False,False,8.0,8.0,True,True,True
1,potager,low,full_sun,False,False,False,True,False,8.0,8.0,True,False,False
2,herbes,low,part_shade,True,False,False,True,True,7.0,7.0,True,False,False
3,arbres,moderate,full_sun,True,False,False,True,False,8.0,8.0,True,False,True


In [128]:
distances, indices = nn.kneighbors(preprocessor.transform(user_data.drop(columns=['hardiness_min'])))

In [129]:
# Utilisateur : Fleurs, moderate maintenance, part_shade, not thorny
similar_plants = plants_dataset.iloc[indices[0]].copy()
similar_plants['_distance'] = distances[0]

similar_plants.sort_values(by='_distance')

Unnamed: 0,common_name,scientific_name,type,maintenance,sunlight,drought_tolerant,salt_tolerant,thorny,edible_fruit,medicinal,hardiness_min,hardiness_max,is_perennial,attracts_birds,attracts_butterflies,_distance
1222,sweet William,['Dianthus barbatus'],fleurs,moderate,full_sun,False,False,False,False,False,3.0,9.0,True,True,True,1.732051
967,kousa dogwood,"[""Cornus kousa 'Fireworks'""]",arbres,low,part_shade,False,False,False,False,False,4.0,8.0,True,True,True,2.0
898,fringeleaf tickseed,['Coreopsis integrifolia'],fleurs,moderate,full_sun,False,True,True,False,False,5.0,9.0,True,False,True,2.0
356,aster,"[""Aster frikartii 'Flora's Delight'""]",fleurs,moderate,full_sun,False,True,False,False,False,5.0,8.0,True,False,True,2.0
990,red twig dogwood,"[""Cornus sericea 'Hedgerows Gold'""]",arbustes,moderate,full_sun,False,False,False,False,False,3.0,8.0,True,True,True,2.0
357,aster,"[""Aster frikartii 'Jungfrau'""]",fleurs,moderate,full_sun,False,True,False,False,False,5.0,8.0,True,False,True,2.0
218,glossy abelia,"[""Abelia grandiflora 'MINDUO1' SUNNY ANNIVERSA...",arbustes,moderate,full_sun,False,False,False,False,False,6.0,8.0,True,True,True,2.0


In [130]:
# Utilisateur : Potager moderate maintenance, part_shade : 
# fonctionne pas -> à cause de hardiness min et max (les plantes type "potager" de notre jeu de données ont une zone de robusticité large ex: 3.0-11.0 alors que l'utilisateur n'a pas un intervalle, juste 8.0-8.0)
similar_plants = plants_dataset.iloc[indices[1]].copy()
similar_plants['_distance'] = distances[1]

similar_plants.sort_values(by='_distance')

Unnamed: 0,common_name,scientific_name,type,maintenance,sunlight,drought_tolerant,salt_tolerant,thorny,edible_fruit,medicinal,hardiness_min,hardiness_max,is_perennial,attracts_birds,attracts_butterflies,_distance
1066,snow crocus,"[""Crocus chrysanthus 'Goldilocks'""]",fleurs,low,full_sun,False,False,False,False,False,3.0,8.0,True,False,False,1.732051
1002,winter hazel,['Corylopsis glabrescens var. gotoana'],arbustes,low,full_sun,False,False,False,False,False,5.0,8.0,True,False,False,1.732051
1064,snow crocus,['Crocus chrysanthus'],fleurs,low,full_sun,False,False,False,False,False,3.0,8.0,True,False,False,1.732051
975,cornelian cherry dogwood,"[""Cornus mas 'Golden Glory'""]",arbres,low,full_sun,False,False,False,True,False,4.0,8.0,True,True,False,1.732051
1067,snow crocus,"[""Crocus chrysanthus 'Snowbunting'""]",fleurs,low,full_sun,False,False,False,False,False,3.0,8.0,True,False,False,1.732051
234,kiwifruit,['Actinidia deliciosa'],potager,moderate,full_sun,False,False,False,True,True,8.0,9.0,True,False,False,1.732051
747,beautyberry,"[""Callicarpa bodinieri var. giraldii 'Profusio...",arbustes,low,full_sun,False,True,False,True,False,6.0,8.0,True,False,False,1.732051


In [131]:
# Utilisateur : Herbes médicinales : low maintenance, part_shade

similar_plants = plants_dataset.iloc[indices[2]].copy()
similar_plants['_distance'] = distances[2]

similar_plants.sort_values(by='_distance')

Unnamed: 0,common_name,scientific_name,type,maintenance,sunlight,drought_tolerant,salt_tolerant,thorny,edible_fruit,medicinal,hardiness_min,hardiness_max,is_perennial,attracts_birds,attracts_butterflies,_distance
326,prairie milkweed,['Asclepias sullivantii'],herbes,low,full_sun,True,False,False,False,True,3.0,7.0,True,False,True,1.732051
1387,wood fern,['Dryopteris crassirhizoma'],herbes,low,full_shade,True,False,False,False,True,5.0,8.0,True,False,False,1.732051
998,corydalis,['Corydalis lutea'],herbes,low,full_shade,False,False,False,False,True,5.0,7.0,True,False,False,1.732051
1200,crinkled hair grass,['Deschampsia flexuosa'],herbes,low,part_shade,True,True,False,False,False,4.0,7.0,True,False,False,1.732051
852,horse balm,['Collinsonia canadensis'],herbes,low,part_shade,True,True,False,False,True,4.0,8.0,True,False,False,1.732051
1397,goldie's shield fern,['Dryopteris goldiana'],herbes,low,full_shade,True,False,False,False,False,3.0,7.0,True,False,False,1.732051
571,pigsqueak,"[""Bergenia cordifolia 'Winterglut' WINTER GLOW""]",herbes,low,full_shade,True,False,False,False,True,4.0,8.0,True,False,False,1.732051
