In [1]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install scikit-learn



In [1]:
%%writefile ./libs/util_modelo_referencial.py
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
import util_caracteristicas, util_fasta

def crear_modelo_referencial(identificador, tuned_parameters, scores, n_jobs):
    print("lectura de archivos fasta...")
    
    codigos_lncRNA = util_fasta.leer_fasta("./data/" + identificador + ".lncRNA.fasta")
    codigos_PCT = util_fasta.leer_fasta("./data/" + identificador + ".PCT.fasta")
    
    print("levantamiento de features...")
    
    dict_features_lncRNA = util_caracteristicas.generar_caracteristicas(identificador, codigos_lncRNA)
    dict_features_PCT = util_caracteristicas.generar_caracteristicas(identificador, codigos_PCT)
    
    features_lncRNA = [list(x.values()) for x in dict_features_lncRNA.values()]
    features_PCT = [list(x.values()) for x in dict_features_PCT.values()]
    
    print("inicio generación del modelo...")
    
    X = features_lncRNA + features_PCT
    y = ([1] * len(features_lncRNA)) + ([0] * len(features_PCT))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
    
    for score in scores:
        print("# Tuning hyper-parameters for %s" % score)
        print()

        clf = GridSearchCV(SVC(), tuned_parameters, cv=10,
                           scoring=score, n_jobs=n_jobs)
        clf.fit(X_train, y_train)

        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r"
                  % (mean, std * 2, params))
        print()

        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = y_test, clf.predict(X_test)
        print(classification_report(y_true, y_pred))
        print()

Overwriting ./libs/util_modelo_referencial.py


In [5]:
%%time
import sys
sys.path.append('./libs')
import util_modelo_referencial
import warnings
warnings.filterwarnings("ignore")

print("Iniciando proceso...")
identificador = "Especie2"
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
scores = ['precision_macro', 'recall_macro', 'accuracy']
#2.43 tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [1]}]
#3.40 tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [10]}]
#13.00 tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [100]}]
#4.18 tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [1, 10]}] #n_jobs=None
#3.7 tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [1, 10]}] #n_jobs=-1
#8.34 tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [100]}]
#no_termina tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [1000]}]
#tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-4], 'C': [1, 10, 100]}]
#tuned_parameters = [{'kernel': ['linear'], 'C': [1, 10, 100]}]
#tuned_parameters = [{'kernel': ['linear'], 'C': [1000]}]
#
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3], 'C': [1,10,20,30,40,50,60,70,80,90,100]}]
scores = ['accuracy']
util_modelo_referencial.crear_modelo_referencial(identificador, tuned_parameters, scores, n_jobs=-1)

print("Proceso terminado...")

Iniciando proceso...
lectura de archivos fasta...
levantamiento de features...
inicio generación del modelo...
# Tuning hyper-parameters for accuracy



KeyboardInterrupt: 