In [5]:

import pandas as pd
import numpy as np
import keras
from tensorflow.keras.optimizers import RMSprop,Adam
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import random
import time
import os

In [6]:
df=pd.read_csv("dataset.csv", sep=",")

In [8]:
Marque = df.loc[:,"Marque"]
Ref = df.loc[:,"Reference"]

In [10]:
Marque

0      Etat Libre dOrange
1             Ted Lapidus
2                 Lancome
3                   Guess
4                 Pitbull
              ...        
501                 Kenzo
502       Lolita Lempicka
503         Salvador Dali
504             Halloween
505              Oriflame
Name: Marque, Length: 506, dtype: object

In [69]:
def clean(string):
    a1=string.replace("Eau de Parfum","")
    a2=a1.replace("mixte","")
    a3=a2.replace("femme","")
    a4=a3.replace("homme","")
    a5=a4.replace("Eau de Toilette","")
    a6=a5.replace("pour","")
    return a6.rstrip()

In [72]:
Marque_input=[i for i in Marque]
Ref_input=[clean(i) for i in Ref]
Ref_input

['Hermann a Mes Cotes Me Paraissait Une Ombre',
 'Oud Blanc',
 'La Nuit Tresor Intense',
 'Seductive Noir',
 'Pitubull Woman',
 'Fairy Love',
 'Hot Couture',
 'Tag Him huile parfumee',
 'Sweet Sixteen Coral',
 'Icon Collection 90 Pure White',
 'Indonesian Oud',
 'Italian Bergamot',
 'Portrayal',
 'Incense Wood',
 'Ciel',
 'Splendida Patchouli Tentation',
 'Sheer Beauty',
 'Glam Jasmine',
 'Wanted By Night',
 'Marwah',
 'Adwaa Al Sharq huile parfumee',
 'Donna',
 'Amor Amor',
 'Pour Homme',
 'Charlie Blue Eau Fraiche',
 'Yellow Diamond',
 'Thalasso Therapy',
 'Remarquables Tinhare',
 'Inara White',
 'Jontue eau de cologne',
 'Promise Me Intense',
 'Kitten Fur eau de cologne',
 'EXOTIC 74',
 'Ambre',
 'Shalimar',
 'Miss Ap',
 'Pour Elle',
 'Posh On The Green',
 'Black for Her',
 'Omnia Coral',
 'Meliora',
 'La Yuqawam',
 'Green Tea eau de cologne',
 "Katy Perry's Mad Love",
 'Mora Bella',
 "L'Initial",
 'Eros',
 'Paris Oud',
 'Mon Guerlain Bloom of Rose',
 'Fever',
 'REPLICA Sailing Day'

In [73]:
concat_marque = '\n'.join(Marque_input).lower()
concat_Ref = '\n'.join(Ref_input).lower()

In [74]:
chars_marque,chars_Ref = sorted(list(set(concat_marque))),sorted(list(set(concat_Ref)))

In [75]:
num_char_marque,num_char_ref=len(chars_marque),len(chars_Ref)

In [76]:
char2idx_marque = dict((c, i) for i, c in enumerate(chars_marque))
idx2char_marque = dict((i, c) for i, c in enumerate(chars_marque))

char2idx_ref = dict((c, i) for i, c in enumerate(chars_Ref))
idx2char_ref = dict((i, c) for i, c in enumerate(chars_Ref))

In [77]:
max_sequence_length_marque = max([len(name) for name in Marque_input])
max_sequence_length_ref = max([len(name) for name in Ref_input])


In [78]:
print('--------------------------Marque---------------------------------------')
print('Total chars: {}'.format(num_char_marque))
print('Corpus length:', len(concat_marque))
print('Number of names: ', len(Marque_input))
print('Longest name: ', max_sequence_length_marque)
print('--------------------------Reference---------------------------------------')
print('Total chars: {}'.format(num_char_marque))
print('Corpus length:', len(concat_Ref))
print('Number of names: ', len(Ref_input))
print('Longest name: ', max_sequence_length_ref)

--------------------------Marque---------------------------------------
Total chars: 36
Corpus length: 6109
Number of names:  506
Longest name:  27
--------------------------Reference---------------------------------------
Total chars: 36
Corpus length: 8363
Number of names:  506
Longest name:  59


In [79]:
step_length = 1   
epochs = 50       
batch_size = 64    
latent_dim = 128   
dropout_rate = 0.2 
verbosity = 0     
gen_amount = 10 

In [32]:
sequences = []
next_chars = []
for i in range(0, len(concat_marque) -max_sequence_length_marque, step_length):
    sequences.append(concat_marque[i: i + max_sequence_length_marque])
    next_chars.append(concat_marque[i + max_sequence_length_marque])

num_sequences = len(sequences)

for i in range(20):
    print('X=[{}]   y=[{}]'.replace('\n', ' ').format(sequences[i], next_chars[i]).replace('\n', ' '))

X=[etat libre dorange ted lapi]   y=[d]
X=[tat libre dorange ted lapid]   y=[u]
X=[at libre dorange ted lapidu]   y=[s]
X=[t libre dorange ted lapidus]   y=[ ]
X=[ libre dorange ted lapidus ]   y=[l]
X=[libre dorange ted lapidus l]   y=[a]
X=[ibre dorange ted lapidus la]   y=[n]
X=[bre dorange ted lapidus lan]   y=[c]
X=[re dorange ted lapidus lanc]   y=[o]
X=[e dorange ted lapidus lanco]   y=[m]
X=[ dorange ted lapidus lancom]   y=[e]
X=[dorange ted lapidus lancome]   y=[ ]
X=[orange ted lapidus lancome ]   y=[g]
X=[range ted lapidus lancome g]   y=[u]
X=[ange ted lapidus lancome gu]   y=[e]
X=[nge ted lapidus lancome gue]   y=[s]
X=[ge ted lapidus lancome gues]   y=[s]
X=[e ted lapidus lancome guess]   y=[ ]
X=[ ted lapidus lancome guess ]   y=[p]
X=[ted lapidus lancome guess p]   y=[i]


In [35]:
X = np.zeros((num_sequences, max_sequence_length_marque, num_char_marque), dtype=np.bool)
Y = np.zeros((num_sequences, num_char_marque), dtype=np.bool)

for i, sequence in enumerate(sequences):
    for j, char in enumerate(sequence):
        X[i, j, char2idx_marque[char]] = 1
    Y[i, char2idx_marque[next_chars[i]]] = 1
    
print('X shape: {}'.format(X.shape))
print('Y shape: {}'.format(Y.shape))
#print(X[0])
#print(Y[0])

X shape: (6082, 27, 36)
Y shape: (6082, 36)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X = np.zeros((num_sequences, max_sequence_length_marque, num_char_marque), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  Y = np.zeros((num_sequences, num_char_marque), dtype=np.bool)


In [36]:
model = Sequential()
model.add(LSTM(latent_dim, 
               input_shape=(max_sequence_length_marque, num_char_marque),  
               recurrent_dropout=dropout_rate))
model.add(Dense(units=num_char_marque, activation='softmax'))

optimizer = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               84480     
                                                                 
 dense (Dense)               (None, 36)                4644      
                                                                 
Total params: 89,124
Trainable params: 89,124
Non-trainable params: 0
_________________________________________________________________


  super(Adam, self).__init__(name, **kwargs)


In [37]:
start = time.time()
print('Start training for {} epochs'.format(epochs))
history = model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbosity)
end = time.time()
print('Finished training - time elapsed:', (end - start)/60, 'min')

Start training for 50 epochs
Finished training - time elapsed: 2.8524643182754517 min


In [42]:
def generate_marque():
    r = np.random.choice(len(concat_marque)-1)
    r2 = r-max_sequence_length_marque

    sequence = concat_marque[r2:r-1] + '\n'

    new_names = []
    #print(sequence)
    while len(new_names) < 1:

        x = np.zeros((1, max_sequence_length_marque, num_char_marque))
        for i, char in enumerate(sequence):
            x[0, i, char2idx_marque[char]] = 1

        probs = model.predict(x, verbose=0)[0]
        probs /= probs.sum()
        next_idx = np.random.choice(len(probs), p=probs)   
        next_char = idx2char_marque[next_idx]   
        sequence = sequence[1:] + next_char

        if next_char == '\n':

            gen_name = [name for name in sequence.split('\n')][1]

            if len(gen_name) > 4 and gen_name[0] == gen_name[1]:
                gen_name = gen_name[1:]

            if len(gen_name) > 4 and len(gen_name) <= 7:

                if gen_name not in Marque_input + new_names:
                    new_names.append(gen_name.capitalize())
                    return gen_name.capitalize()

<p> Generate nom d'une marque </p>

In [45]:
#for _ in range(200):
#    if generate_marque() in Marque_input:
#        pass
#    else:
 #       print(generate_marque())
#------------------------------------Nom intéressant à choisir-----------------------------------------------
#Camuto
#Gabred
#Licobs
#Revlon

In [80]:
sequences = []
next_chars = []
for i in range(0, len(concat_Ref) -max_sequence_length_ref, step_length):
    sequences.append(concat_Ref[i: i + max_sequence_length_ref])
    next_chars.append(concat_Ref[i + max_sequence_length_ref])

num_sequences = len(sequences)

for i in range(20):
    print('X=[{}]   y=[{}]'.replace('\n', ' ').format(sequences[i], next_chars[i]).replace('\n', ' '))

X=[hermann a mes cotes me paraissait une ombre oud blanc la nu]   y=[i]
X=[ermann a mes cotes me paraissait une ombre oud blanc la nui]   y=[t]
X=[rmann a mes cotes me paraissait une ombre oud blanc la nuit]   y=[ ]
X=[mann a mes cotes me paraissait une ombre oud blanc la nuit ]   y=[t]
X=[ann a mes cotes me paraissait une ombre oud blanc la nuit t]   y=[r]
X=[nn a mes cotes me paraissait une ombre oud blanc la nuit tr]   y=[e]
X=[n a mes cotes me paraissait une ombre oud blanc la nuit tre]   y=[s]
X=[ a mes cotes me paraissait une ombre oud blanc la nuit tres]   y=[o]
X=[a mes cotes me paraissait une ombre oud blanc la nuit treso]   y=[r]
X=[ mes cotes me paraissait une ombre oud blanc la nuit tresor]   y=[ ]
X=[mes cotes me paraissait une ombre oud blanc la nuit tresor ]   y=[i]
X=[es cotes me paraissait une ombre oud blanc la nuit tresor i]   y=[n]
X=[s cotes me paraissait une ombre oud blanc la nuit tresor in]   y=[t]
X=[ cotes me paraissait une ombre oud blanc la nuit tresor int] 

In [81]:
X = np.zeros((num_sequences, max_sequence_length_ref, num_char_ref), dtype=np.bool)
Y = np.zeros((num_sequences, num_char_ref), dtype=np.bool)

for i, sequence in enumerate(sequences):
    for j, char in enumerate(sequence):
        X[i, j, char2idx_ref[char]] = 1
    Y[i, char2idx_ref[next_chars[i]]] = 1
    
print('X shape: {}'.format(X.shape))
print('Y shape: {}'.format(Y.shape))
#print(X[0])
#print(Y[0])

X shape: (8304, 59, 39)
Y shape: (8304, 39)


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X = np.zeros((num_sequences, max_sequence_length_ref, num_char_ref), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  Y = np.zeros((num_sequences, num_char_ref), dtype=np.bool)


In [82]:
model2 = Sequential()
model2.add(LSTM(latent_dim, 
               input_shape=(max_sequence_length_ref, num_char_ref),  
               recurrent_dropout=dropout_rate))
model2.add(Dense(units=num_char_ref, activation='softmax'))

optimizer = Adam(lr=0.01)
model2.compile(loss='categorical_crossentropy',
              optimizer=optimizer)

model2.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_5 (LSTM)               (None, 128)               86016     
                                                                 
 dense_5 (Dense)             (None, 39)                5031      
                                                                 
Total params: 91,047
Trainable params: 91,047
Non-trainable params: 0
_________________________________________________________________


In [83]:
start = time.time()
print('Start training for {} epochs'.format(epochs))
history = model2.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbosity)
end = time.time()
print('Finished training - time elapsed:', (end - start)/60, 'min')

Start training for 50 epochs
Finished training - time elapsed: 12.147922281424204 min


In [86]:
def generate_Ref():
    r = np.random.choice(len(concat_Ref )-1)
    r2 = r-max_sequence_length_ref

    sequence = concat_Ref[r2:r-1] + '\n'

    new_names = []
    #print(sequence)
    while len(new_names) < 1:

        x = np.zeros((1, max_sequence_length_ref, num_char_ref))
        for i, char in enumerate(sequence):
            x[0, i, char2idx_ref[char]] = 1

        probs = model2.predict(x, verbose=0)[0]
        probs /= probs.sum()
        next_idx = np.random.choice(len(probs), p=probs)   
        next_char = idx2char_ref[next_idx]   
        sequence = sequence[1:] + next_char

        if next_char == '\n':

            gen_name = [name for name in sequence.split('\n')][1]

            if len(gen_name) > 4 and gen_name[0] == gen_name[1]:
                gen_name = gen_name[1:]

            if len(gen_name) > 4 and len(gen_name) <= 7:

                if gen_name not in Ref_input + new_names:
                    new_names.append(gen_name.capitalize())
                    return gen_name.capitalize()

In [95]:
import random
for _ in range(500):
    word=""
    for i in range(random.randint(1,4)):
        word+=generate_Ref()+" "
    print(word)
#Forever Suprême
#Vences Faith Ambre Bouquet 
#J'adore Ambre 
#Intense Flowers 
#Miris Paris 
#Elixir Rebella 
#Celline Man x Olyfet 
#Flowers La vie 
#Vanilla Legend

Dione 
Tanger Oudh 36 Petiver Shaghaf 
Fever Dnine 
Rique Kashaf Arburu 
Fantasy Mirit Lavance 
Dazzle  In yve Rebelle 
Lover 
Arcipel 
Nythe 
Flowers La vie 
Haybeti Meliora Iscasha Citrus 
Rebelle 
Valique Scherle Faith Daore 
Y oud 
J'adore On girl Encant 
Hayasia Pegaso Cinema 
Lilac Seduc Sport 
Erosi Vanilla Degend 
Bruth Shaghaf Catina Incense 
Shantal Lomar 
J'adore Flowers 
Rebelle Lilac Wante 
Jonuer 
Dione Miancas Incense 
Forever Cones Knthe 
Vanill Vanilla Coupe Intense 
Sport 
Hayba Mignod Bouche 
Imone Hayapi Prick 
In red Pegaso Lonuois Guess 
Hayba 
Women Hayba Celline Debut 
Gress Femme Shaghaf 
Achas Journey Night Hayba 
Man x Blush 
Giorgio Green Swhet 


KeyboardInterrupt: 