In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
# import my functions
from model_tuning.random_grid_tunning import random_model_tuning, opt_model
from Artist.prep import lyrics_dataframe, split_transform, artist_dict

### Read data files:

In [2]:
df = lyrics_dataframe(artist_dict)

### Split dataframe

In [3]:
X_train, X_test , y_train, y_test = split_transform(df)

### Define models to test with a random grid:

In [4]:
models= [
    #RandomForestClassifier(),
    #DecisionTreeClassifier(),
    LogisticRegression(),
    #KNeighborsClassifier(),
    MultinomialNB(),
]   

### Run random grid search

In [5]:
model_tuning_report = random_model_tuning(models, X_train, y_train,n_iter=3)
with open(f'model_tuning/model_tuning_report.txt','w') as my_file:
    for line in model_tuning_report:
        my_file.write(line)

### Define grid for grid hyperparameter tuning:

In [6]:
LR_grid = {
        'penalty' : ['l2'],
        'C' : np.linspace(0.1, 1.1, 10),
        'solver' : ['saga',], #'sag','liblinear','lbfgs','newton-cg'
        'max_iter': [100],
        'tol':[0.005],
        'random_state': [100],
    }

In [7]:
NB_grid = {
        'alpha': np.logspace(-4, 4, 20), 
        'class_prior': [None], 
        'fit_prior': [True],
    }

In [8]:
m_lr = opt_model(LogisticRegression(), LR_grid, X_train,y_train)
m_lr.score(X_test,y_test)

Chosen Hyperparameters: LogisticRegression(C=0.9888888888888888, random_state=100, solver='saga',
                   tol=0.005). The score is 0.9126102580855929 


0.6099951004409603

In [9]:
m_nb = opt_model(MultinomialNB(), NB_grid, X_train,y_train)
m_nb.score(X_test,y_test)

Chosen Hyperparameters: MultinomialNB(alpha=1.623776739188721). The score is 0.8696504410323423 


0.6148946594806467