# Supervised Learning Model Evaluation Lab

Complete the exercises below to solidify your knowledge and understanding of supervised learning model evaluation.

In [189]:
import pandas as pd
import numpy as np
import warnings as wa
import sklearn.datasets as sk

In [190]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
housing1 = pd.DataFrame(data=housing.data, columns=housing.feature_names)
housing1['target'] = housing.target

# Visualizar el DataFrame
housing1.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


## Regression Model Evaluation

## 1. Split this data set into training (80%) and testing (20%) sets.

The `MEDV` field represents the median value of owner-occupied homes (in $1000's) and is the target variable that we will want to predict.

In [191]:
from sklearn.model_selection import train_test_split
tts = train_test_split

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Verificar las formas de los conjuntos de datos
print("Forma de X_train:", X_train.shape)
print("Forma de X_test:", X_test.shape)
print("Forma de y_train:", y_train.shape)
print("Forma de y_test:", y_test.shape)

Forma de X_train: (120, 4)
Forma de X_test: (30, 4)
Forma de y_train: (120,)
Forma de y_test: (30,)


## 2. Train a `LinearRegression` model on this data set and generate predictions on both the training and the testing set.

In [192]:
from sklearn.linear_model import LinearRegression

# Inicializar el modelo de regresión lineal
regression_model = LinearRegression()

# Entrenar el modelo en el conjunto de entrenamiento
regression_model.fit(X_train, y_train)

# Generar predicciones en el conjunto de entrenamiento y prueba
y_train_pred = regression_model.predict(X_train)
y_test_pred = regression_model.predict(X_test)

# Imprimir las primeras 5 predicciones en el conjunto de entrenamiento
print("Predicciones en el conjunto de entrenamiento:")
print(y_train_pred[:5])

# Imprimir las primeras 5 predicciones en el conjunto de prueba
print("\nPredicciones en el conjunto de prueba:")
print(y_test_pred[:5])


Predicciones en el conjunto de entrenamiento:
[-0.13337979 -0.07219427  1.17602799  0.00825707 -0.00972756]

Predicciones en el conjunto de prueba:
[ 1.23071715 -0.04010441  2.21970287  1.34966889  1.28429336]


## 3. Calculate and print R-squared for both the training and the testing set.

In [193]:
from sklearn.metrics import r2_score

# Calcular R-cuadrado para el conjunto de entrenamiento
r2_train = r2_score(y_train, y_train_pred)

# Calcular R-cuadrado para el conjunto de prueba
r2_test = r2_score(y_test, y_test_pred)

# Imprimir R-cuadrado para el conjunto de entrenamiento y prueba
print("R-cuadrado (conjunto de entrenamiento):", r2_train)
print("R-cuadrado (conjunto de prueba):", r2_test)


R-cuadrado (conjunto de entrenamiento): 0.9254199044989622
R-cuadrado (conjunto de prueba): 0.9468960016420045


## 4. Calculate and print mean squared error for both the training and the testing set.

In [194]:
from sklearn.metrics import mean_squared_error

# Calcular MSE para el conjunto de entrenamiento
mse_train = mean_squared_error(y_train, y_train_pred)

# Calcular MSE para el conjunto de prueba
mse_test = mean_squared_error(y_test, y_test_pred)

# Imprimir MSE para el conjunto de entrenamiento y prueba
print("MSE (conjunto de entrenamiento):", mse_train)
print("MSE (conjunto de prueba):", mse_test)


MSE (conjunto de entrenamiento): 0.049093383698217904
MSE (conjunto de prueba): 0.037113794407976894


## 5. Calculate and print mean absolute error for both the training and the testing set.

In [195]:
from sklearn.metrics import mean_absolute_error

# Calcular MAE para el conjunto de entrenamiento
mae_train = mean_absolute_error(y_train, y_train_pred)

# Calcular MAE para el conjunto de prueba
mae_test = mean_absolute_error(y_test, y_test_pred)

# Imprimir MAE para el conjunto de entrenamiento y prueba
print("MAE (conjunto de entrenamiento):", mae_train)
print("MAE (conjunto de prueba):", mae_test)


MAE (conjunto de entrenamiento): 0.17139700580706987
MAE (conjunto de prueba): 0.14637694965308531


## Classification Model Evaluation

In [196]:
from sklearn.datasets import load_iris

data = load_iris()

X = pd.DataFrame(data["data"], columns=data["feature_names"])
y = pd.DataFrame(data["target"], columns=["class"])

data = pd.concat([X, y], axis=1)
data

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


## 6. Split this data set into training (80%) and testing (20%) sets.

The `class` field represents the type of flower and is the target variable that we will want to predict.

In [197]:
from sklearn.model_selection import train_test_split

# Dividir los datos en características (X) y etiquetas de clase (y)
X = data.drop("class", axis=1)
y = data["class"]

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Imprimir la forma de los conjuntos de entrenamiento y prueba
print("Forma del conjunto de entrenamiento:", X_train.shape)
print("Forma del conjunto de prueba:", X_test.shape)


Forma del conjunto de entrenamiento: (120, 4)
Forma del conjunto de prueba: (30, 4)


## 7. Train a `LogisticRegression` model on this data set and generate predictions on both the training and the testing set.

In [198]:

from sklearn.linear_model import LogisticRegression as LogReg
import warnings
warnings.filterwarnings('ignore')



from sklearn.ensemble import RandomForestRegressor as RFR    

  

from sklearn.datasets import load_diabetes   


In [199]:
from sklearn.linear_model import LogisticRegression

# Crear una instancia del modelo de regresión logística
logreg = LogisticRegression()

# Entrenar el modelo de regresión logística
logreg.fit(X_train, y_train)

# Generar predicciones en el conjunto de entrenamiento y prueba
y_train_pred = logreg.predict(X_train)
y_test_pred = logreg.predict(X_test)





## 8. Calculate and print the accuracy score for both the training and the testing set.

In [200]:
from sklearn.metrics import accuracy_score

# Calcular el puntaje de precisión para el conjunto de entrenamiento
accuracy_train = accuracy_score(y_train, y_train_pred)

# Calcular el puntaje de precisión para el conjunto de prueba
accuracy_test = accuracy_score(y_test, y_test_pred)

# Imprimir el puntaje de precisión para el conjunto de entrenamiento y prueba
print("Accuracy Score (Training Set):", accuracy_train)
print("Accuracy Score (Testing Set):", accuracy_test)


Accuracy Score (Training Set): 0.975
Accuracy Score (Testing Set): 1.0


## 9. Calculate and print the balanced accuracy score for both the training and the testing set.

In [201]:
from sklearn.metrics import balanced_accuracy_score

# balanced accuracytraining set
balanced_accuracy_train = balanced_accuracy_score(y_train, y_train_pred)

# balanced accuracy testing set
balanced_accuracy_test = balanced_accuracy_score(y_test, y_test_pred)

#balanced accuracy  traning  and test
print("Balanced Accuracy Score (Training Set):", balanced_accuracy_train)
print("Balanced Accuracy Score (Testing Set):", balanced_accuracy_test)


Balanced Accuracy Score (Training Set): 0.975609756097561
Balanced Accuracy Score (Testing Set): 1.0


## 10. Calculate and print the precision score for both the training and the testing set.

In [202]:
from sklearn.metrics import precision_score

# Calculate the precision score for the training set
precision_train = precision_score(y_train, y_train_pred, average='weighted')

# Calculate the precision score for the testing set
precision_test = precision_score(y_test, y_test_pred, average='weighted')

# Print the precision score for the training and testing sets
print("Precision Score (Training Set):", precision_train)
print("Precision Score (Testing Set):", precision_test)


Precision Score (Training Set): 0.9767857142857144
Precision Score (Testing Set): 1.0


## 11. Calculate and print the recall score for both the training and the testing set.

In [203]:
from sklearn.metrics import recall_score

# Calculate the recall score for the training set
recall_train = recall_score(y_train, y_train_pred, average='weighted')

# Calculate the recall score for the testing set
recall_test = recall_score(y_test, y_test_pred, average='weighted')

# Print the recall score for the training and testing sets
print("Recall Score (Training Set):", recall_train)
print("Recall Score (Testing Set):", recall_test)
 

Recall Score (Training Set): 0.975
Recall Score (Testing Set): 1.0


## 12. Calculate and print the F1 score for both the training and the testing set.

In [204]:
from sklearn.metrics import f1_score

# Calculate the F1 score for the training set
f1_train = f1_score(y_train, y_train_pred, average='weighted')

# Calculate the F1 score for the testing set
f1_test = f1_score(y_test, y_test_pred, average='weighted')

# Print the F1 score for the training and testing sets
print("F1 Score (Training Set):", f1_train)
print("F1 Score (Testing Set):", f1_test)


F1 Score (Training Set): 0.9749882794186592
F1 Score (Testing Set): 1.0


## 13. Generate confusion matrices for both the training and the testing set.

In [205]:
from sklearn.metrics import confusion_matrix

# Generate the confusion matrix for the training set
cm_train = confusion_matrix(y_train, y_train_pred)

# Generate the confusion matrix for the testing set
cm_test = confusion_matrix(y_test, y_test_pred)

# Print the confusion matrices
print("Confusion Matrix (Training Set):\n", cm_train)
print("Confusion Matrix (Testing Set):\n", cm_test)


Confusion Matrix (Training Set):
 [[40  0  0]
 [ 0 38  3]
 [ 0  0 39]]
Confusion Matrix (Testing Set):
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


## Bonus: For each of the data sets in this lab, try training with some of the other models you have learned about, recalculate the evaluation metrics, and compare to determine which models perform best on each data set.