# Modelos de Regresión. End to End
## Predecir la calidad del vino
## data/winequality-red


In [None]:
# Decision Tree, Bagging, Pasting, RandomForest, ExtraTrees y AdaBoost
import pandas as pd
vinos = pd.read_csv("data/winequality-red.csv")

In [None]:
len(vinos)

In [None]:
vinos.head()

In [None]:
vinos.info()

In [None]:
vinos.describe()

In [None]:
vinos["quality"].value_counts(normalize=True)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(vinos.drop(columns=["quality"]), vinos["quality"], test_size=0.2, random_state=1743)


In [None]:
y_train.value_counts(normalize=True)

In [None]:
y_test.value_counts(normalize=True)

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit
X = vinos.drop(columns=["quality"])
y = vinos["quality"]
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1743)
for train_index, test_index in split.split(X, y):
    train_set = X.iloc[train_index]
    train_y_set = y.iloc[train_index]
    test_set = X.iloc[test_index]
    test_y_set = y.iloc[test_index]

In [None]:
train_y_set.value_counts(normalize=True)

In [None]:
test_y_set.value_counts(normalize=True)

In [None]:
import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
from pathlib import Path
# Esta es otra forma de crear directorios 
# IMAGES_PATH es un objeto que permite hacer llamadas al sistema
# como por ejemplo, crear un directorio
IMAGES_PATH = Path() / "images" / "regression_trees"
# parents: crear directorios superiores o no
# exist_ok: qué pasa si ya existe
IMAGES_PATH.mkdir(parents=True, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = IMAGES_PATH / f"{fig_id}.{fig_extension}"
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [None]:
from sklearn.tree import export_graphviz
from graphviz import Source
export_graphviz(
    tree_reg,
    out_file=str(IMAGES_PATH / "regression_tree.dot"),
    feature_names=train_set.columns,
    rounded=True,
    filled=True
)
Source.from_file(IMAGES_PATH / "regression_tree.dot")