In [148]:
import pandas as pd

data = pd.read_csv("../resources/data/dataset.csv", delimiter=';')
data.describe()

Unnamed: 0,altura,peso,IMC,objetivos
count,999.0,999.0,999.0,999.0
mean,166.456456,91.738739,36.999389,2.059059
std,30.999093,33.170097,20.486089,1.179108
min,115.0,35.0,7.36,0.0
25%,139.0,63.0,21.955,1.0
50%,166.0,91.0,32.31,3.0
75%,193.0,120.0,46.82,3.0
max,219.0,149.0,110.73,3.0


In [149]:
from numpy import mean, std
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression

X, y = data.iloc[:, :-1], data.iloc[:, -1]

model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=KFold(n_splits=20), n_jobs=-1)

print('Mean Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

Mean Accuracy: 0.982 (0.012)


In [None]:
model.fit(X, y)

row_1 = [170, 70, 24.2]
row_0 = [185, 57, 16.7]

p1 = model.predict([row_1])
p0 = model.predict([row_0])

print('Excepted class 1 -> Predicted Class: %d' % p1[0])
print('Excepted class 0 -> Predicted Class: %d' % p0[0])

In [None]:
from matplotlib import pyplot

def get_models():
    models = dict()
    for p in [0.0, 0.0001, 0.001, 0.01, 0.1, 1.0]:
        key = '%.4f' % p
        if p == 0.0:
            models[key] = LogisticRegression(multi_class='multinomial', solver='lbfgs', penalty='none', max_iter=1000)
        else:
            models[key] = LogisticRegression(multi_class='multinomial', solver='lbfgs', penalty='l2', C=p, max_iter=1000)
    return models

def evaluate_model(model, X, y):
    scores = dict()
    for ns in [10, 15, 20, 25]:
        scores[ns] = cross_val_score(model, X, y, scoring='accuracy', cv=KFold(n_splits=ns), n_jobs=-1)

    return scores

models = get_models()
results, names = list(), list()

for name, model in models.items():
    scores = evaluate_model(model, X, y)
    for score_name, score in scores.items():
        results.append(score)
        key = f'{name} -> {score_name}'
        names.append(key)

        print('-> %s %.3f (%.3f)' % (key, mean(score), std(score)))


pyplot.boxplot(results, labels=names, showmeans=True)
pyplot.show()

In [None]:
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=KFold(n_splits=10), n_jobs=-1)

print('Mean Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

In [None]:
from sklearn.model_selection import cross_val_predict
import sklearn.metrics as metrics

model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)

y_pred = dict()
for ns in [10, 15, 20, 25]:
    y_pred[f'cv->{ns}'] = cross_val_predict(model, X, y, cv=KFold(n_splits=ns), n_jobs=-1)

In [None]:
for name, y_pred in y_pred.items():
    print(f'----------------{name}----------------\n')
    print("Exactitud: %.3f\n" % (metrics.accuracy_score(y, y_pred)))
    print("Precisión: %.3f\n" % (metrics.precision_score(y, y_pred, average="micro")))
    print("Sensibilidad: %.3f\n" % (metrics.recall_score(y, y_pred, average="micro")))
    print("F1: %.3f\n" % (metrics.f1_score(y, y_pred, average="micro")))
    print("Matriz de confusión:\n", metrics.confusion_matrix(y, y_pred))
    print("Tabla de métricas:\n", metrics.classification_report(y, y_pred))
    print(f'---------------------------------------\n')

In [None]:
import pickle

# Version final del modelo
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X, y)

pkl_filename = "../models/pickle_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)

In [None]:
# Selección de atributos.
import numpy as np
from sklearn.feature_selection import f_regression, mutual_info_regression

# Dependencia lineal.
f_test, _ = f_regression(X, y)
f_test /= np.max(f_test)

# Evaluación de atributos.
columns = ['Altura', 'Peso', 'IMC']
for i in range(3):
    print(f'{columns[i]}: El valor de su F-Test es {round(f_test[i], 2)}')

In [152]:
X = X.iloc[:, :2]
print(X)

     altura  peso
0       187   104
1       147    35
2       176   141
3       139   125
4       178    47
..      ...   ...
994     163   113
995     116    67
996     117   133
997     180   140
998     217    39

[999 rows x 2 columns]


In [None]:
from sklearn import preprocessing

standardizer = preprocessing.StandardScaler()
standardized_data = standardizer.fit_transform(X)

# Output
print(standardized_data)

In [None]:
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.covariance import EllipticEnvelope
from limpieza_funciones import grafico_outliers

standardized_data = pd.DataFrame(data=standardized_data)

# Limpieza de datos: detección de outliers.
outlier_method = EllipticEnvelope().fit(standardized_data)
scores_pred = outlier_method.decision_function(standardized_data)
threshold = stats.scoreatpercentile(scores_pred, 25)

print("%.3f %%" % (100*len(scores_pred[scores_pred < threshold])/len(scores_pred)))

# Dibujar gráfica de outliers.
grafico_outliers(standardized_data, outlier_method, 150, threshold, -7, 7)

In [None]:
from sklearn.covariance import EllipticEnvelope

# Limpieza de datos: detección de outliers.
outlier_method = EllipticEnvelope().fit(standardized_data)
predicted_outliers = outlier_method.predict(standardized_data)

standardized_data = standardized_data[predicted_outliers > 0]
len(standardized_data)

#Output
899

In [None]:
y = y[predicted_outliers > 0]
print(y)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn import linear_model, metrics
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import KFold
import numpy as np

k = 10
algoritmos = {'OLS': linear_model.LinearRegression(),
              'KNN': KNeighborsRegressor(n_neighbors = k),
              'LOGR': LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
             }

metricas = {
  'MAE':  metrics.mean_absolute_error,
  'RMSE': lambda y, y_pred:
          sqrt(metrics.mean_squared_error(y, y_pred)),
  'R2':   metrics.r2_score
}

y_pred = {}
for nombre, alg in algoritmos.items():
    y_pred[nombre] = cross_val_predict(alg, standardized_data, y,
                                       cv=KFold(n_splits = k))

In [None]:
from evaluacion_funciones import *
from math import sqrt

# Evaluación y presentación de resultados.
for nombre, alg in algoritmos.items():
    result = evaluacion(y, y_pred[nombre], metricas)
    grafica_real_vs_pred(y, y_pred[nombre], result, nombre)

In [153]:
from sklearn.neighbors import KNeighborsRegressor
import pickle

# Version final del modelo
model = KNeighborsRegressor(n_neighbors = 10)
model.fit(X, y)

pkl_filename = "../models/pickle_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)


In [93]:
food = pd.read_csv('../resources/data/diets.csv', delimiter=';')
diet = food[food['Target'] == 'hipo'].iloc[:,:-1]

diet = diet.rename(columns={'Day': 'Dia', 'Time': 'Hora', 'Food': 'Comida'})

# diet['Dia'] = pd.Categorical(diet['Dia'], ordered=True,
#                              categories=['Lunes', 'Martes', 'Miercoles', 'Jueves', 'Viernes', 'Sabado',
#                                          'Domingo'])
#
# diet['Hora'] = pd.Categorical(diet['Hora'], ordered=True,
#                              categories=['Desayuno', 'Almuerzo', 'Comida', 'Merienda', 'Cena'])
#
# diet = pd.pivot_table(diet, index=['Dia', 'Hora'], values='Comida', aggfunc='last')

display(diet)

Unnamed: 0,Comida,Dia,Hora
0,Te verde. Yogurt natural sin azucar con copos ...,Lunes,Desayuno
1,Infusion. Uvas,Lunes,Almuerzo
2,Pure de calabacin y calabaza. Pollo al horno c...,Lunes,Comida
3,Infusion. Dos mandarinas,Lunes,Merienda
4,Ensalada de tomate con tortilla de champiñones...,Lunes,Cena
5,Cafe con leche desnatada. Tostada de pan integ...,Martes,Desayuno
6,Infusion. Dos kiwis,Martes,Almuerzo
7,Ensalada de judias verdes con espirales integr...,Martes,Comida
8,Infusion. Caqui,Martes,Merienda
9,"Parrilladad e tomate, cabolla y pimiento rojo....",Martes,Cena


In [147]:
import re

def __remove_allergies(food, allergies):
    food_without_allergies = [None] * len(food)
    for idx_food, eats in enumerate(food):
        meals = [None] * len(eats.split('.'))
        for idx_meal, meal in enumerate(eats.split('.')):
            for allergy in allergies:
                if allergy.lower() in meal.lower():
                    meal = re.sub(f'(?i){allergy}', '', meal).replace('  y', '')

                meals[idx_meal] = meal

        food_without_allergies[idx_food] = '.'.join([str(meal) for meal in meals])

    return food_without_allergies

__remove_allergies(diet['Comida'][:2], ['Uvas', 'calabacin'])

['Te verde. Yogurt natural sin azucar con copos de espelta. Kiwi',
 'Infusion. ']

In [None]:
from jinja2 import Environment, FileSystemLoader
from weasyprint import HTML

env = Environment(loader=FileSystemLoader('../resources/template'))
template = env.get_template('diet.html')

template_vars = {"title": "DIETA SEMANAL", "meal": diet.to_html()}
html_out = template.render(template_vars)

path_output_file = f'../resources/template/test.pdf'
HTML(string=html_out).write_pdf(path_output_file)