In [1]:
import joblib
from sklearn import decomposition

# Parameter Selection for NMF 1

Now we look at the more advanced task of parameter selection for NMF topic modelling - namely, selecting a useful value for the number of topics *k*.

First, let's load the TF-IDF normalised document-term matrix and list of terms that we stored earlier using *Joblib*:

In [2]:
(df, data_samples, tfidf, tfidf_feature_names) = joblib.load( "df-data_samples-tfidf-tfidf_names.pkl" )
print( "Loaded %d X %d document-term matrix" % (tfidf.shape[0], tfidf.shape[1]) )

Loaded 4691 X 1292 document-term matrix


### Create the Topic Models

A common approach for parameter selection is to Measure and compare the topic coherence of models generated for different values of *k*.

We need to start by pre-specifying an initial range of "sensible" values:

In [3]:
kmin, kmax = 2, 40

In [4]:
def list_pas_iter(pas, iterat):
    #arrondir à la 2eme décimale
    value = round(0,2)
    values = [value]
    for k in range(iterat):
        value+= pas
        values.append(round(value,2))
    return(values)

In [5]:
alph_list = list_pas_iter(0.01, 31) + [0.4,0.5,0.6,0.7,0.8,0.9,1]

In [6]:
alph_list

[0,
 0.01,
 0.02,
 0.03,
 0.04,
 0.05,
 0.06,
 0.07,
 0.08,
 0.09,
 0.1,
 0.11,
 0.12,
 0.13,
 0.14,
 0.15,
 0.16,
 0.17,
 0.18,
 0.19,
 0.2,
 0.21,
 0.22,
 0.23,
 0.24,
 0.25,
 0.26,
 0.27,
 0.28,
 0.29,
 0.3,
 0.31,
 0.4,
 0.5,
 0.6,
 0.7,
 0.8,
 0.9,
 1]

In [7]:
l1_list = list_pas_iter(0.1,10)

Apply NMF for each of these values:

In [8]:
topic_models = []
# try each value of k
for k in range(kmin,kmax+1):
    for alph in alph_list:
        for l1 in l1_list:
            print("Applying NMF for k=%d, alpha =%.2f, l1 = =%.1f..." % (k, alph,l1) )
            # run NMF
            model = decomposition.NMF( init="nndsvd", n_components=k, alpha=alph, l1_ratio= l1, random_state = 8220) 
            W = model.fit_transform( tfidf )
            H = model.components_
            err = model.reconstruction_err_
            # store for later
            topic_models.append( (k,alph,l1,W,H,err) )

Applying NMF for k=2, alpha =0.00, l1 = =0.0...
Applying NMF for k=2, alpha =0.00, l1 = =0.1...
Applying NMF for k=2, alpha =0.00, l1 = =0.2...
Applying NMF for k=2, alpha =0.00, l1 = =0.3...
Applying NMF for k=2, alpha =0.00, l1 = =0.4...
Applying NMF for k=2, alpha =0.00, l1 = =0.5...
Applying NMF for k=2, alpha =0.00, l1 = =0.6...
Applying NMF for k=2, alpha =0.00, l1 = =0.7...
Applying NMF for k=2, alpha =0.00, l1 = =0.8...
Applying NMF for k=2, alpha =0.00, l1 = =0.9...
Applying NMF for k=2, alpha =0.00, l1 = =1.0...
Applying NMF for k=2, alpha =0.01, l1 = =0.0...




Applying NMF for k=2, alpha =0.01, l1 = =0.1...




Applying NMF for k=2, alpha =0.01, l1 = =0.2...




Applying NMF for k=2, alpha =0.01, l1 = =0.3...




Applying NMF for k=2, alpha =0.01, l1 = =0.4...




Applying NMF for k=2, alpha =0.01, l1 = =0.5...




Applying NMF for k=2, alpha =0.01, l1 = =0.6...




Applying NMF for k=2, alpha =0.01, l1 = =0.7...




Applying NMF for k=2, alpha =0.01, l1 = =0.8...




Applying NMF for k=2, alpha =0.01, l1 = =0.9...




Applying NMF for k=2, alpha =0.01, l1 = =1.0...




Applying NMF for k=2, alpha =0.02, l1 = =0.0...




Applying NMF for k=2, alpha =0.02, l1 = =0.1...




Applying NMF for k=2, alpha =0.02, l1 = =0.2...




Applying NMF for k=2, alpha =0.02, l1 = =0.3...




Applying NMF for k=2, alpha =0.02, l1 = =0.4...




Applying NMF for k=2, alpha =0.02, l1 = =0.5...




Applying NMF for k=2, alpha =0.02, l1 = =0.6...




Applying NMF for k=2, alpha =0.02, l1 = =0.7...




Applying NMF for k=2, alpha =0.02, l1 = =0.8...




Applying NMF for k=2, alpha =0.02, l1 = =0.9...




Applying NMF for k=2, alpha =0.02, l1 = =1.0...




Applying NMF for k=2, alpha =0.03, l1 = =0.0...




Applying NMF for k=2, alpha =0.03, l1 = =0.1...




Applying NMF for k=2, alpha =0.03, l1 = =0.2...




Applying NMF for k=2, alpha =0.03, l1 = =0.3...




Applying NMF for k=2, alpha =0.03, l1 = =0.4...




Applying NMF for k=2, alpha =0.03, l1 = =0.5...




Applying NMF for k=2, alpha =0.03, l1 = =0.6...




Applying NMF for k=2, alpha =0.03, l1 = =0.7...




Applying NMF for k=2, alpha =0.03, l1 = =0.8...




Applying NMF for k=2, alpha =0.03, l1 = =0.9...




Applying NMF for k=2, alpha =0.03, l1 = =1.0...




Applying NMF for k=2, alpha =0.04, l1 = =0.0...




Applying NMF for k=2, alpha =0.04, l1 = =0.1...




Applying NMF for k=2, alpha =0.04, l1 = =0.2...




Applying NMF for k=2, alpha =0.04, l1 = =0.3...




Applying NMF for k=2, alpha =0.04, l1 = =0.4...




Applying NMF for k=2, alpha =0.04, l1 = =0.5...




Applying NMF for k=2, alpha =0.04, l1 = =0.6...




Applying NMF for k=2, alpha =0.04, l1 = =0.7...




Applying NMF for k=2, alpha =0.04, l1 = =0.8...




Applying NMF for k=2, alpha =0.04, l1 = =0.9...
Applying NMF for k=2, alpha =0.04, l1 = =1.0...
Applying NMF for k=2, alpha =0.05, l1 = =0.0...




Applying NMF for k=2, alpha =0.05, l1 = =0.1...




Applying NMF for k=2, alpha =0.05, l1 = =0.2...




Applying NMF for k=2, alpha =0.05, l1 = =0.3...




Applying NMF for k=2, alpha =0.05, l1 = =0.4...




Applying NMF for k=2, alpha =0.05, l1 = =0.5...




Applying NMF for k=2, alpha =0.05, l1 = =0.6...
Applying NMF for k=2, alpha =0.05, l1 = =0.7...
Applying NMF for k=2, alpha =0.05, l1 = =0.8...
Applying NMF for k=2, alpha =0.05, l1 = =0.9...
Applying NMF for k=2, alpha =0.05, l1 = =1.0...
Applying NMF for k=2, alpha =0.06, l1 = =0.0...




Applying NMF for k=2, alpha =0.06, l1 = =0.1...




Applying NMF for k=2, alpha =0.06, l1 = =0.2...




Applying NMF for k=2, alpha =0.06, l1 = =0.3...




Applying NMF for k=2, alpha =0.06, l1 = =0.4...
Applying NMF for k=2, alpha =0.06, l1 = =0.5...
Applying NMF for k=2, alpha =0.06, l1 = =0.6...
Applying NMF for k=2, alpha =0.06, l1 = =0.7...
Applying NMF for k=2, alpha =0.06, l1 = =0.8...
Applying NMF for k=2, alpha =0.06, l1 = =0.9...
Applying NMF for k=2, alpha =0.06, l1 = =1.0...
Applying NMF for k=2, alpha =0.07, l1 = =0.0...




Applying NMF for k=2, alpha =0.07, l1 = =0.1...




Applying NMF for k=2, alpha =0.07, l1 = =0.2...
Applying NMF for k=2, alpha =0.07, l1 = =0.3...
Applying NMF for k=2, alpha =0.07, l1 = =0.4...
Applying NMF for k=2, alpha =0.07, l1 = =0.5...
Applying NMF for k=2, alpha =0.07, l1 = =0.6...
Applying NMF for k=2, alpha =0.07, l1 = =0.7...
Applying NMF for k=2, alpha =0.07, l1 = =0.8...
Applying NMF for k=2, alpha =0.07, l1 = =0.9...
Applying NMF for k=2, alpha =0.07, l1 = =1.0...
Applying NMF for k=2, alpha =0.08, l1 = =0.0...




Applying NMF for k=2, alpha =0.08, l1 = =0.1...
Applying NMF for k=2, alpha =0.08, l1 = =0.2...
Applying NMF for k=2, alpha =0.08, l1 = =0.3...
Applying NMF for k=2, alpha =0.08, l1 = =0.4...
Applying NMF for k=2, alpha =0.08, l1 = =0.5...
Applying NMF for k=2, alpha =0.08, l1 = =0.6...
Applying NMF for k=2, alpha =0.08, l1 = =0.7...
Applying NMF for k=2, alpha =0.08, l1 = =0.8...
Applying NMF for k=2, alpha =0.08, l1 = =0.9...
Applying NMF for k=2, alpha =0.08, l1 = =1.0...
Applying NMF for k=2, alpha =0.09, l1 = =0.0...
Applying NMF for k=2, alpha =0.09, l1 = =0.1...
Applying NMF for k=2, alpha =0.09, l1 = =0.2...
Applying NMF for k=2, alpha =0.09, l1 = =0.3...
Applying NMF for k=2, alpha =0.09, l1 = =0.4...
Applying NMF for k=2, alpha =0.09, l1 = =0.5...
Applying NMF for k=2, alpha =0.09, l1 = =0.6...
Applying NMF for k=2, alpha =0.09, l1 = =0.7...
Applying NMF for k=2, alpha =0.09, l1 = =0.8...
Applying NMF for k=2, alpha =0.09, l1 = =0.9...
Applying NMF for k=2, alpha =0.09, l1 = 

Applying NMF for k=2, alpha =0.23, l1 = =0.7...
Applying NMF for k=2, alpha =0.23, l1 = =0.8...
Applying NMF for k=2, alpha =0.23, l1 = =0.9...
Applying NMF for k=2, alpha =0.23, l1 = =1.0...
Applying NMF for k=2, alpha =0.24, l1 = =0.0...
Applying NMF for k=2, alpha =0.24, l1 = =0.1...
Applying NMF for k=2, alpha =0.24, l1 = =0.2...
Applying NMF for k=2, alpha =0.24, l1 = =0.3...
Applying NMF for k=2, alpha =0.24, l1 = =0.4...
Applying NMF for k=2, alpha =0.24, l1 = =0.5...
Applying NMF for k=2, alpha =0.24, l1 = =0.6...
Applying NMF for k=2, alpha =0.24, l1 = =0.7...
Applying NMF for k=2, alpha =0.24, l1 = =0.8...
Applying NMF for k=2, alpha =0.24, l1 = =0.9...
Applying NMF for k=2, alpha =0.24, l1 = =1.0...
Applying NMF for k=2, alpha =0.25, l1 = =0.0...
Applying NMF for k=2, alpha =0.25, l1 = =0.1...
Applying NMF for k=2, alpha =0.25, l1 = =0.2...
Applying NMF for k=2, alpha =0.25, l1 = =0.3...
Applying NMF for k=2, alpha =0.25, l1 = =0.4...
Applying NMF for k=2, alpha =0.25, l1 = 



Applying NMF for k=2, alpha =1.00, l1 = =0.0...
Applying NMF for k=2, alpha =1.00, l1 = =0.1...
Applying NMF for k=2, alpha =1.00, l1 = =0.2...
Applying NMF for k=2, alpha =1.00, l1 = =0.3...
Applying NMF for k=2, alpha =1.00, l1 = =0.4...
Applying NMF for k=2, alpha =1.00, l1 = =0.5...
Applying NMF for k=2, alpha =1.00, l1 = =0.6...
Applying NMF for k=2, alpha =1.00, l1 = =0.7...
Applying NMF for k=2, alpha =1.00, l1 = =0.8...
Applying NMF for k=2, alpha =1.00, l1 = =0.9...
Applying NMF for k=2, alpha =1.00, l1 = =1.0...




Applying NMF for k=3, alpha =0.00, l1 = =0.0...
Applying NMF for k=3, alpha =0.00, l1 = =0.1...
Applying NMF for k=3, alpha =0.00, l1 = =0.2...
Applying NMF for k=3, alpha =0.00, l1 = =0.3...
Applying NMF for k=3, alpha =0.00, l1 = =0.4...
Applying NMF for k=3, alpha =0.00, l1 = =0.5...
Applying NMF for k=3, alpha =0.00, l1 = =0.6...
Applying NMF for k=3, alpha =0.00, l1 = =0.7...
Applying NMF for k=3, alpha =0.00, l1 = =0.8...
Applying NMF for k=3, alpha =0.00, l1 = =0.9...
Applying NMF for k=3, alpha =0.00, l1 = =1.0...
Applying NMF for k=3, alpha =0.01, l1 = =0.0...




Applying NMF for k=3, alpha =0.01, l1 = =0.1...




Applying NMF for k=3, alpha =0.01, l1 = =0.2...




Applying NMF for k=3, alpha =0.01, l1 = =0.3...




Applying NMF for k=3, alpha =0.01, l1 = =0.4...




Applying NMF for k=3, alpha =0.01, l1 = =0.5...




Applying NMF for k=3, alpha =0.01, l1 = =0.6...




Applying NMF for k=3, alpha =0.01, l1 = =0.7...




Applying NMF for k=3, alpha =0.01, l1 = =0.8...




Applying NMF for k=3, alpha =0.01, l1 = =0.9...




Applying NMF for k=3, alpha =0.01, l1 = =1.0...




Applying NMF for k=3, alpha =0.02, l1 = =0.0...




Applying NMF for k=3, alpha =0.02, l1 = =0.1...




Applying NMF for k=3, alpha =0.02, l1 = =0.2...




Applying NMF for k=3, alpha =0.02, l1 = =0.3...




Applying NMF for k=3, alpha =0.02, l1 = =0.4...




Applying NMF for k=3, alpha =0.02, l1 = =0.5...




Applying NMF for k=3, alpha =0.02, l1 = =0.6...




Applying NMF for k=3, alpha =0.02, l1 = =0.7...




Applying NMF for k=3, alpha =0.02, l1 = =0.8...




Applying NMF for k=3, alpha =0.02, l1 = =0.9...




Applying NMF for k=3, alpha =0.02, l1 = =1.0...




Applying NMF for k=3, alpha =0.03, l1 = =0.0...




Applying NMF for k=3, alpha =0.03, l1 = =0.1...




Applying NMF for k=3, alpha =0.03, l1 = =0.2...




Applying NMF for k=3, alpha =0.03, l1 = =0.3...




Applying NMF for k=3, alpha =0.03, l1 = =0.4...




Applying NMF for k=3, alpha =0.03, l1 = =0.5...




Applying NMF for k=3, alpha =0.03, l1 = =0.6...




Applying NMF for k=3, alpha =0.03, l1 = =0.7...




Applying NMF for k=3, alpha =0.03, l1 = =0.8...
Applying NMF for k=3, alpha =0.03, l1 = =0.9...
Applying NMF for k=3, alpha =0.03, l1 = =1.0...
Applying NMF for k=3, alpha =0.04, l1 = =0.0...




Applying NMF for k=3, alpha =0.04, l1 = =0.1...




Applying NMF for k=3, alpha =0.04, l1 = =0.2...




Applying NMF for k=3, alpha =0.04, l1 = =0.3...




Applying NMF for k=3, alpha =0.04, l1 = =0.4...
Applying NMF for k=3, alpha =0.04, l1 = =0.5...
Applying NMF for k=3, alpha =0.04, l1 = =0.6...
Applying NMF for k=3, alpha =0.04, l1 = =0.7...
Applying NMF for k=3, alpha =0.04, l1 = =0.8...
Applying NMF for k=3, alpha =0.04, l1 = =0.9...
Applying NMF for k=3, alpha =0.04, l1 = =1.0...
Applying NMF for k=3, alpha =0.05, l1 = =0.0...




Applying NMF for k=3, alpha =0.05, l1 = =0.1...




Applying NMF for k=3, alpha =0.05, l1 = =0.2...




Applying NMF for k=3, alpha =0.05, l1 = =0.3...
Applying NMF for k=3, alpha =0.05, l1 = =0.4...
Applying NMF for k=3, alpha =0.05, l1 = =0.5...
Applying NMF for k=3, alpha =0.05, l1 = =0.6...
Applying NMF for k=3, alpha =0.05, l1 = =0.7...
Applying NMF for k=3, alpha =0.05, l1 = =0.8...
Applying NMF for k=3, alpha =0.05, l1 = =0.9...
Applying NMF for k=3, alpha =0.05, l1 = =1.0...
Applying NMF for k=3, alpha =0.06, l1 = =0.0...




Applying NMF for k=3, alpha =0.06, l1 = =0.1...
Applying NMF for k=3, alpha =0.06, l1 = =0.2...
Applying NMF for k=3, alpha =0.06, l1 = =0.3...
Applying NMF for k=3, alpha =0.06, l1 = =0.4...
Applying NMF for k=3, alpha =0.06, l1 = =0.5...
Applying NMF for k=3, alpha =0.06, l1 = =0.6...
Applying NMF for k=3, alpha =0.06, l1 = =0.7...
Applying NMF for k=3, alpha =0.06, l1 = =0.8...
Applying NMF for k=3, alpha =0.06, l1 = =0.9...
Applying NMF for k=3, alpha =0.06, l1 = =1.0...
Applying NMF for k=3, alpha =0.07, l1 = =0.0...




Applying NMF for k=3, alpha =0.07, l1 = =0.1...
Applying NMF for k=3, alpha =0.07, l1 = =0.2...
Applying NMF for k=3, alpha =0.07, l1 = =0.3...
Applying NMF for k=3, alpha =0.07, l1 = =0.4...
Applying NMF for k=3, alpha =0.07, l1 = =0.5...
Applying NMF for k=3, alpha =0.07, l1 = =0.6...
Applying NMF for k=3, alpha =0.07, l1 = =0.7...
Applying NMF for k=3, alpha =0.07, l1 = =0.8...
Applying NMF for k=3, alpha =0.07, l1 = =0.9...
Applying NMF for k=3, alpha =0.07, l1 = =1.0...
Applying NMF for k=3, alpha =0.08, l1 = =0.0...
Applying NMF for k=3, alpha =0.08, l1 = =0.1...
Applying NMF for k=3, alpha =0.08, l1 = =0.2...
Applying NMF for k=3, alpha =0.08, l1 = =0.3...
Applying NMF for k=3, alpha =0.08, l1 = =0.4...
Applying NMF for k=3, alpha =0.08, l1 = =0.5...
Applying NMF for k=3, alpha =0.08, l1 = =0.6...
Applying NMF for k=3, alpha =0.08, l1 = =0.7...
Applying NMF for k=3, alpha =0.08, l1 = =0.8...
Applying NMF for k=3, alpha =0.08, l1 = =0.9...
Applying NMF for k=3, alpha =0.08, l1 = 

Applying NMF for k=3, alpha =0.22, l1 = =0.7...
Applying NMF for k=3, alpha =0.22, l1 = =0.8...
Applying NMF for k=3, alpha =0.22, l1 = =0.9...
Applying NMF for k=3, alpha =0.22, l1 = =1.0...
Applying NMF for k=3, alpha =0.23, l1 = =0.0...
Applying NMF for k=3, alpha =0.23, l1 = =0.1...
Applying NMF for k=3, alpha =0.23, l1 = =0.2...
Applying NMF for k=3, alpha =0.23, l1 = =0.3...
Applying NMF for k=3, alpha =0.23, l1 = =0.4...
Applying NMF for k=3, alpha =0.23, l1 = =0.5...
Applying NMF for k=3, alpha =0.23, l1 = =0.6...
Applying NMF for k=3, alpha =0.23, l1 = =0.7...
Applying NMF for k=3, alpha =0.23, l1 = =0.8...
Applying NMF for k=3, alpha =0.23, l1 = =0.9...
Applying NMF for k=3, alpha =0.23, l1 = =1.0...
Applying NMF for k=3, alpha =0.24, l1 = =0.0...
Applying NMF for k=3, alpha =0.24, l1 = =0.1...
Applying NMF for k=3, alpha =0.24, l1 = =0.2...
Applying NMF for k=3, alpha =0.24, l1 = =0.3...
Applying NMF for k=3, alpha =0.24, l1 = =0.4...
Applying NMF for k=3, alpha =0.24, l1 = 



Applying NMF for k=3, alpha =1.00, l1 = =0.0...
Applying NMF for k=3, alpha =1.00, l1 = =0.1...
Applying NMF for k=3, alpha =1.00, l1 = =0.2...
Applying NMF for k=3, alpha =1.00, l1 = =0.3...
Applying NMF for k=3, alpha =1.00, l1 = =0.4...
Applying NMF for k=3, alpha =1.00, l1 = =0.5...
Applying NMF for k=3, alpha =1.00, l1 = =0.6...
Applying NMF for k=3, alpha =1.00, l1 = =0.7...
Applying NMF for k=3, alpha =1.00, l1 = =0.8...
Applying NMF for k=3, alpha =1.00, l1 = =0.9...
Applying NMF for k=3, alpha =1.00, l1 = =1.0...




In [9]:
joblib.dump((topic_models), "topic_models.pkl") 

['topic_models.pkl']