In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
from sklearn.preprocessing import StandardScaler

In [62]:
red_wine = pd.read_csv(r'C:\Users\angel\OneDrive\Documentos\DATA_SCIENCE\Bootcamp\ML_WineQuality\data\winequality-red.csv', delimiter=';')
white_wine = pd.read_csv(r'C:\Users\angel\OneDrive\Documentos\DATA_SCIENCE\Bootcamp\ML_WineQuality\data\winequality-white.csv', delimiter=';')

print("Vino Tinto (red wine) - Primeras filas:")
print(red_wine.head())
print("\nVino Blanco (white wine) - Primeras filas:")
print(white_wine.head())

red_wine.columns = red_wine.columns.str.replace('"', '').str.strip()
white_wine.columns = white_wine.columns.str.replace('"', '').str.strip()

print("Columnas vino tinto:")
print(red_wine.columns)
print("Columnas vino blanco:")
print(white_wine.columns)

Vino Tinto (red wine) - Primeras filas:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1 

In [63]:
X_red = red_wine.drop('quality', axis=1)
y_red = red_wine['quality']
X_white = white_wine.drop('quality', axis=1)
y_white = white_wine['quality']
X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(X_red, y_red, test_size=0.2, random_state=42)
X_train_white, X_test_white, y_train_white, y_test_white = train_test_split(X_white, y_white, test_size=0.2, random_state=42)

In [64]:
scaler = StandardScaler()
X_train_red = scaler.fit_transform(X_train_red)
X_test_red = scaler.transform(X_test_red)
X_train_white = scaler.fit_transform(X_train_white)
X_test_white = scaler.transform(X_test_white)

In [65]:
model_red = RandomForestRegressor(random_state=42)
model_red.fit(X_train_red, y_train_red)
y_pred_red = model_red.predict(X_test_red)
mse_red = mean_squared_error(y_test_red, y_pred_red)
r2_red = r2_score(y_test_red, y_pred_red)

print("Vino Tinto - MSE:", mse_red)
print("Vino Tinto - R2:", r2_red)

model_white = RandomForestRegressor(random_state=42)
model_white.fit(X_train_white, y_train_white)
y_pred_white = model_white.predict(X_test_white)
mse_white = mean_squared_error(y_test_white, y_pred_white)
r2_white = r2_score(y_test_white, y_pred_white)

print("Vino Blanco - MSE:", mse_white)
print("Vino Blanco - R2:", r2_white)

Vino Tinto - MSE: 0.3006603124999999
Vino Tinto - R2: 0.5399271357910311
Vino Blanco - MSE: 0.3482283673469388
Vino Blanco - R2: 0.5503674032406387


In [66]:
joblib.dump(model_red, r'C:\Users\angel\OneDrive\Documentos\DATA_SCIENCE\Bootcamp\ML_WineQuality\src\models\model_red_wine.pkl')
joblib.dump(model_white, r'C:\Users\angel\OneDrive\Documentos\DATA_SCIENCE\Bootcamp\ML_WineQuality\src\models\model_white_wine.pkl')

['C:\\Users\\angel\\OneDrive\\Documentos\\DATA_SCIENCE\\Bootcamp\\ML_WineQuality\\src\\models\\model_white_wine.pkl']

In [67]:
metrics = {
    "Vino Tinto": {
        "MSE": mse_red,
        "R2": r2_red
    },
    "Vino Blanco": {
        "MSE": mse_white,
        "R2": r2_white
    }
}
print(metrics)

{'Vino Tinto': {'MSE': 0.3006603124999999, 'R2': 0.5399271357910311}, 'Vino Blanco': {'MSE': 0.3482283673469388, 'R2': 0.5503674032406387}}


In [68]:
model_red = RandomForestRegressor()
model_red.fit(X_train_red, y_train_red)
model_white = RandomForestRegressor()
model_white.fit(X_train_white, y_train_white)

prediction_red_test = model_red.predict(X_test_red)
prediction_white_test = model_white.predict(X_test_white)
print(f"Predicción calidad vino tinto (test): {prediction_red_test}")
print(f"Predicción calidad vino blanco (test): {prediction_white_test}")

Predicción calidad vino tinto (test): [5.29 5.18 5.48 5.19 5.97 5.08 5.08 4.94 6.13 5.97 6.83 5.22 5.83 5.13
 5.47 6.34 5.43 5.76 6.87 5.1  4.98 5.88 5.25 5.85 5.57 5.97 6.3  5.3
 5.34 5.94 5.37 5.44 5.85 5.47 5.79 5.16 6.34 6.06 5.41 6.06 5.23 5.22
 6.37 5.04 5.57 5.66 6.4  5.56 5.21 5.56 5.04 5.29 5.45 7.06 5.17 5.71
 6.02 6.03 5.6  5.07 5.64 6.16 5.56 5.25 6.68 5.39 6.79 5.59 6.71 5.54
 6.1  5.26 5.79 5.66 6.07 5.06 6.52 5.16 5.95 6.63 5.17 6.84 5.06 5.79
 5.77 6.57 4.99 6.   6.5  5.39 6.32 5.5  4.95 5.13 5.23 5.46 5.12 5.86
 4.5  5.53 5.13 5.06 5.8  6.56 5.46 6.72 5.86 5.2  5.23 5.24 6.63 5.03
 6.25 5.03 5.15 6.26 5.35 5.26 5.18 5.71 6.12 5.78 5.9  5.36 5.72 5.3
 6.3  5.46 5.32 5.69 5.79 5.44 5.06 6.39 5.56 5.06 4.9  5.35 5.11 5.91
 6.68 6.18 6.53 5.34 5.52 5.14 5.52 5.65 5.51 5.08 5.85 6.09 5.46 5.17
 5.79 5.47 5.58 6.6  5.26 5.89 6.04 5.5  6.28 5.09 5.47 5.75 5.71 5.03
 4.81 5.23 5.06 4.94 6.45 5.34 6.23 5.95 6.25 5.08 5.31 5.08 4.59 5.97
 5.36 6.28 4.93 6.6  5.86 5.77 6.77 5.34 