In [2]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas.api.types import is_string_dtype

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.preprocessing import OrdinalEncoder
from sklearn import clone
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score

from keras.models import Sequential
from keras.layers import Dense, Input
from scikeras.wrappers import KerasRegressor
import tensorflow as tf

In [3]:
# DOCUMENTATION CELL
SequentialEnsembleDoc = '''
    Implements an ensemble of different models to create a meta-model with much better accuracy, though at a higher resource demand. It is similar to Gradient Boosting.

    This is achieved by using the same training algorithm (it must be supervised, meaning the expected solution must be known in advance) with different data in each iteration. In this way, model i attempts to compensate for the flaws of model i-1 (the number of models is determined by the n_estimators attribute). This could also be achieved by using different algorithms on the same data, but that was not the case for this project. Furthermore, it can be done sequentially or in parallel, with this project implementing the sequential approach. 

    The meta-model bases the compensation of each model's flaws on minimizing the gradient of their errors. In this case, the chosen error was the quadratic error. As can be seen, this is an academic project where every decision has been made, and thus many more implementations and combinations can be explored to develop different ensemble algorithms, with this being one potential solution.

    The algorithms used to train the data must be **Regressors**, as we need continuous predictions for both classification and regression problems.

    Attributes:
    -----------
    trainingModel : Regressor
        The algorithm or regressor used to train the data in each iteration. The performance of the meta-model will be highly determined by this model.

    objective : numpy array    
        The array possesing the objective variable values.

    csv : str
        Path to the CSV file to analyze. The target variable must be in the last column.

    n_estimators : int : default_value = 15
        The number of iterations to be performed. Note that not all of them may be completed due to early stopping. This attribute also impacts the model's performance. More iterations means better accuracy but more excution time.

    lr : float : default_value = 0.01
        The learning rate, which measures the importance of each individual model's prediction. Typically takes low values between 0 and 1.

    sample_size : float : default_value = 0.75
        The percentage of data to be used for training. The rest will be used for evaluation via early stopping. Must be between 0 and 1. This attribute also impacts the model's performance. The more data, better accuracy but more execution time.

    task_type : str : default_value = 'regression'
        The type of task: classification or regression. It should only be set to classification if the target variable is boolean (True, False, 0, 1, Yes, No, etc.). Setting it to classification for a regression task will result in poor accuracy.

    early_stopping_patience : int : default_value = 6
        The number of iterations the algorithm will accept a model with low importance to the meta-model before halting.  Its function is to reduce execution time without losing accuracy

    epsilon : float : default_value = 10^-4
        The acceptance value for a model to be considered irrelevant. It is the evaluation score of model i minus the evaluation score of model i-1. Its function is to reduce execution time without losing accuracy

    normalization : bool : default_value = False
        Defines wether the data should be normalized or not. If some like Knn are not normalized, they will yield poor results.
        
    Methods:
    --------
    fit(self, X, y):
        Trains the model using the provided training data (X) and objective variable (y) and adjusts the meta-model. Please note they are arrays of the same size.

    predict(self, X):
        Makes predictions using the trained meta-model. It receives X only because y is stored in self.objective.

    Example:
    --------
    >>> model = SecuencialEnsemble(trainingModel=SomeRegressor(), csv='data.csv', n_estimators=100, lr=0.1)
    >>> model.fit(X_train, y_train)
    >>> predictions = model.predict(X_test)

    >>> cross_val_score(SequentialEnsemble(trainingModel=DecisionTreeRegressor(max_depth=5),objective=individual_objective,lr=0.05, n_estimators=200), original_data,individual_objective,scoring="r2",cv=10, n_jobs=-1)
'''


In [None]:
# De los csv solo deben convertirse las variables categóricas (texto) a numéricas, usando el método que considere más adecuado.
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [5]:
'''
# Step 1: Initialize initial predictions (pred_0) 

# Step 2: Iterate through all estimators
    # 1. Compute residuals (error between true values and current predictions)
    remainder_i = y - actual_pred  
    # 2. Train a new model (estimator_i) to predict remainders
    estimator_i.fit(X, remainder_i)  
    # 3. Get predictions from the new model
    pred_i = estimator_i.predict(X)  
    # 4. Update predictions with learning rate (lr)
    actual_pred = actual_pred + lr * pred_i
    # 5. OPTIONAL -> Early Stopping
    obtain a specified % of the 80% of the original data which was used in step 2 (training) and use the leftover to evaluate and the % to ¿re-train?

# Step 3: Return all trained models
'''

'\n# Step 1: Initialize initial predictions (pred_0) \n\n# Step 2: Iterate through all estimators\n    # 1. Compute residuals (error between true values and current predictions)\n    remainder_i = y - actual_pred  \n    # 2. Train a new model (estimator_i) to predict remainders\n    estimator_i.fit(X, remainder_i)  \n    # 3. Get predictions from the new model\n    pred_i = estimator_i.predict(X)  \n    # 4. Update predictions with learning rate (lr)\n    actual_pred = actual_pred + lr * pred_i\n    # 5. OPTIONAL -> Early Stopping\n    obtain a specified % of the 80% of the original data which was used in step 2 (training) and use the leftover to evaluate and the % to ¿re-train?\n\n# Step 3: Return all trained models\n'

In [6]:
def transform_csv(csv:str, normalization: bool = False) -> DataFrame:
    '''
    Objective variable must be at the end! Reads a csv and encodes the string values with an sklearn's OrdinalEncoder, excluding the last column.
    '''
    data = pd.read_csv(csv)
    dicrete_atributes = []
    continous_atributes = []
    discrete_atributes_encoder = OrdinalEncoder()
    for column_name in data.columns:
        if (is_string_dtype(data[column_name])):
            dicrete_atributes.append(column_name)
        else:
            continous_atributes.append(column_name)
    atributes = data.iloc[:, 0:-1]
    discrete_atributes_encoder.fit(atributes[dicrete_atributes])
    atributes[dicrete_atributes] = discrete_atributes_encoder.transform(atributes[dicrete_atributes])
    if normalization:
        atributes = StandardScaler().fit_transform(atributes)
    
    return (atributes,data.iloc[:, -1])

class SequentialEnsemble(BaseEstimator, RegressorMixin):
    ''' Documentation can be read through help(SequentialEnsemble)'''
    __doc__ = SequentialEnsembleDoc 

    def __init__(self, trainingModel,objective,n_estimators:int = 15, lr:float = 0.01, sample_size:float = 0.75, early_stopping_patience: int = 6, epsilon:float = 2*10**(-4), task_type: str = "regression") -> None:  
        self.models = []
        self.trainingModel = trainingModel  
        self.objective = objective
        self.n_estimators = n_estimators
        self.lr = lr # regulates the importance of each model's prediction
        self.sample_size = sample_size
        self.epsilon = epsilon
        self.early_stopping_patience = early_stopping_patience
        self.scaler = None
        self.task_type = task_type
        self.mean = np.mean(self.objective)
       

    def fit(self, X, y) -> None: # I pass the attributes to keep cross_val_score format  
        pred_actual = np.mean(y) * np.ones(len(y))
        last_r2 = 0
        for i in range(self.n_estimators):
            # 1. Calculate remainder (gradient of the error (cuadratic here) of all models (pred_actual defines the prediction of all models until i))  
            remainder = y - pred_actual 
            # 0. Random sampling -> Check idx explanations
            n_samples = int(len(X) * self.sample_size)
            idx = np.random.choice(len(X), n_samples, replace=False)
            X_sample = X[idx]
            remainder_sample = remainder[idx]
            # 2. Train base model
            if callable(self.trainingModel):  # A function to create the neural network
                input_shape = X_sample.shape[1]
                model = self.trainingModel(input_shape)  # Create a new network in each iteration
            else:
                model = clone(self.trainingModel) # clone to be able to save each trained model. If self.trainingModel were to be directly trained (using fit) it would keep being constantly overwritten (thus overwriting the models saved in self.models), since we would be modifying the reference to the object, not creating a new one.
            model.fit(X_sample, remainder_sample)
            # 3. Update predictions  
            pred_actual[idx] += self.lr * model.predict(X_sample).flatten()
            # # 5. EARLY STOPPING
            val_idx = np.setdiff1d(np.arange(len(X)), idx) # Data discarded for training is used in validation
            actual_r2 = self.early_stopping(X, y, val_idx, last_r2)
            if (self.early_stopping_patience == 0):
                break
            last_r2 = actual_r2
            # 6. Save model  
            self.models.append(model)
    
    def early_stopping(self, X, y, val_idx, last_r2:float) -> float:
        if len(val_idx) > 0:
                val_pred = self.predict(X[val_idx])
                val_true = y[val_idx]
                actual_r2 = r2_score(val_true, val_pred)
        if(abs(last_r2-actual_r2)<=self.epsilon):
            self.early_stopping_patience -=1
        return actual_r2

    def predict(self, X) -> float:
        '''
        Returns the prediction of the previously trained (fit) meta-model for an objective variable of a given data file compatible with pandas.
        ''' 
        pred = self.mean * np.ones(len(X))
        for model in self.models:
            pred += self.lr * model.predict(X)
        if self.task_type == "classification":
        # Sigmoide to escalate to [0, 1] for classification tasks.
            pred = 1 / (1 + np.exp(-pred))  
            pred = (pred >= 0.5).astype(int)
        return pred

In [14]:
original_data, individual_objective = transform_csv("csv/house_prices.csv")
original_data, individual_objective = transform_csv("csv/house_prices.csv")
original_data, individual_objective = original_data.to_numpy(), individual_objective.to_numpy()

original_data_normalized, objective_data_normalized = transform_csv("csv/house_prices.csv", normalization=True) #The objective is not normalized, this name is to associate it to original_data_normalized
objective_data_normalized = objective_data_normalized.to_numpy()

In [None]:
model =SequentialEnsemble(trainingModel=DecisionTreeRegressor(max_depth=5,random_state=SEED),objective=individual_objective,lr=0.05, n_estimators=90)
model.set_params(trainingModel__max_depth=10)
model.get_params()
#Añadir estos tipos de parametros a la rejilla

{'early_stopping_patience': 6,
 'epsilon': 0.0002,
 'lr': 0.05,
 'n_estimators': 90,
 'normalization': False,
 'objective': array([25.011, 32.   , 39.674, ..., 39.088, 15.004, 38.428]),
 'sample_size': 0.75,
 'task_type': 'regression',
 'trainingModel__ccp_alpha': 0.0,
 'trainingModel__criterion': 'squared_error',
 'trainingModel__max_depth': 10,
 'trainingModel__max_features': None,
 'trainingModel__max_leaf_nodes': None,
 'trainingModel__min_impurity_decrease': 0.0,
 'trainingModel__min_samples_leaf': 1,
 'trainingModel__min_samples_split': 2,
 'trainingModel__min_weight_fraction_leaf': 0.0,
 'trainingModel__monotonic_cst': None,
 'trainingModel__random_state': 357823,
 'trainingModel__splitter': 'best',
 'trainingModel': DecisionTreeRegressor(max_depth=10, random_state=357823)}

In [None]:
param_grid = {'lr': [round(i, 2) for i in np.arange(0.01, 0.21, 0.02)], 'n_estimators': list(range(50, 301, 25)), 'trainingModel__max_depth':range(3,7), 'sample_size':[round(i, 2) for i in np.arange(0.75, 0.91, 0.075)] }

grid = GridSearchCV(
    estimator=SequentialEnsemble(trainingModel=DecisionTreeRegressor(random_state=SEED), objective=individual_objective),
    param_grid=param_grid,scoring='r2',cv=5,n_jobs=-1,verbose=1)

grid.fit(original_data, individual_objective)
results = pd.DataFrame(grid.cv_results_)
results[['params', 'mean_test_score', 'rank_test_score']].to_csv("test/tree_results.csv", index=False)
print("Best params:", grid.best_params_)
print("Best R²:", grid.best_score_)

Fitting 5 folds for each of 1320 candidates, totalling 6600 fits
Best params: {'lr': np.float64(0.09), 'n_estimators': 250, 'sample_size': np.float64(0.9), 'trainingModel__max_depth': 6}
Best R²: 0.9250444957125838


In [None]:
param_grid = {'lr': [round(i, 2) for i in np.arange(0.01, 0.21, 0.02)], 'n_estimators': list(range(50, 301, 25)), 'trainingModel__max_depth':range(3,7), 'sample_size':[round(i, 2) for i in np.arange(0.75, 0.91, 0.075)] }

grid = GridSearchCV(
    estimator=SequentialEnsemble(trainingModel=DecisionTreeRegressor(random_state=SEED), objective=individual_objective),
    param_grid=param_grid,scoring='r2',cv=5,n_jobs=-1,verbose=1)

grid.fit(original_data, individual_objective)
results = pd.DataFrame(grid.cv_results_)
results[['params', 'mean_test_score', 'rank_test_score']].to_csv("test/tree_results_houses.csv", index=False)
print("Best params:", grid.best_params_)
print("Best R²:", grid.best_score_)

Fitting 5 folds for each of 1320 candidates, totalling 6600 fits
Best params: {'lr': np.float64(0.15), 'n_estimators': 100, 'sample_size': np.float64(0.9), 'trainingModel__max_depth': 5}
Best R²: 0.7658994907349611


In [None]:
param_grid = {'lr': [round(i, 2) for i in np.arange(0.01, 0.21, 0.04)], 'n_estimators': list(range(100, 301, 50)), 'trainingModel__n_neighbors':range(1,5), 'trainingModel__metric': ['euclidean','manhattan','minkowski']}

grid = GridSearchCV(
    estimator=SequentialEnsemble(trainingModel=KNeighborsRegressor(n_neighbors=5, metric='euclidean',n_jobs=-1, random_state=SEED), objective=objective_data_normalized),
    param_grid=param_grid,scoring='r2',cv=5,n_jobs=-1,verbose=1)

grid.fit(original_data_normalized, objective_data_normalized)
results = pd.DataFrame(grid.cv_results_)
results[['params', 'mean_test_score', 'rank_test_score']].to_csv("test/knn_results.csv", index=False)
print("Mejores parámetros:", grid.best_params_)
print("Mejor R²:", grid.best_score_)

Fitting 5 folds for each of 300 candidates, totalling 1500 fits
Mejores parámetros: {'lr': np.float64(0.01), 'n_estimators': 150, 'trainingModel__metric': 'manhattan', 'trainingModel__n_neighbors': 4}
Mejor R²: 0.5578239421982146


In [None]:
param_grid = {'lr': [round(i, 2) for i in np.arange(0.01, 0.21, 0.02)], 'n_estimators': list(range(100, 301, 50)), 
              'trainingModel__n_neighbors':range(1,6), 'trainingModel__metric': ['euclidean','manhattan','minkowski'],
              'sample_size':[round(i, 2) for i in np.arange(0.75, 0.92, 0.2)]}

grid = GridSearchCV(
    estimator=SequentialEnsemble(trainingModel=KNeighborsRegressor(n_neighbors=5, metric='euclidean',n_jobs=-1, random_state=SEED), objective=objective_data_normalized),
    param_grid=param_grid,scoring='r2',cv=5,n_jobs=-1,verbose=1)

grid.fit(original_data_normalized, objective_data_normalized)
results = pd.DataFrame(grid.cv_results_)
results[['params', 'mean_test_score', 'rank_test_score']].to_csv("test/knn_houses_results.csv", index=False)
print("Mejores parámetros:", grid.best_params_)
print("Mejor R²:", grid.best_score_)

Fitting 5 folds for each of 750 candidates, totalling 3750 fits
Mejores parámetros: {'lr': np.float64(0.01), 'n_estimators': 200, 'sample_size': np.float64(0.75), 'trainingModel__metric': 'manhattan', 'trainingModel__n_neighbors': 2}
Mejor R²: 0.7473178400234269


## OLD TESTING CODE

In [None]:
res = cross_val_score(SequentialEnsemble(trainingModel=DecisionTreeRegressor(max_depth=6,random_state=SEED),objective=individual_objective,lr=0.05, n_estimators=200),original_data,individual_objective,scoring="r2",cv=10, n_jobs=-1)
print(np.mean(res))
print(res)

0.8906086430977187
[0.90805757 0.90883688 0.88716325 0.88485569 0.85877904 0.88157923
 0.89277301 0.90657009 0.88689478 0.89057691]


In [None]:
res = cross_val_score(SequentialEnsemble(trainingModel=KNeighborsRegressor(n_neighbors=4, metric='manhattan', weights='distance', random_state=SEED),n_estimators = 150,lr=0.01,objective=individual_objective,sample_size=0.9),original_data_normalized,individual_objective,scoring="r2",cv=5, n_jobs=-1)
print(np.mean(res))
print(res)

0.5440459488610172
[0.50043453 0.6484962  0.50811438 0.58651223 0.52509013 0.45134701
 0.59899397 0.57037598 0.52408828 0.52700677]


In [None]:
res = cross_val_score(SequentialEnsemble(trainingModel=GradientBoostingRegressor(max_depth=5, random_state=SEED),objective=individual_objective,lr=0.05, n_estimators=50),original_data,individual_objective,scoring="r2",cv=10, n_jobs=-1)
print(np.mean(res))
print(res)

0.9233852314527965
[0.93968816 0.93757149 0.9148591  0.91572482 0.89584778 0.92318355
 0.92787011 0.92685453 0.93046937 0.92178341]


In [None]:
res = cross_val_score(SequentialEnsemble(trainingModel=HistGradientBoostingRegressor(max_depth=5, random_state=SEED),objective=individual_objective,lr=0.05, n_estimators=1),original_data,individual_objective,scoring="r2",cv=10, n_jobs=-1)
print(np.mean(res))
print(res)

0.06967745237753957
[0.07574769 0.07664559 0.05590779 0.07097361 0.07250447 0.06736902
 0.07393766 0.07638866 0.04987197 0.07742807]


In [None]:
res = cross_val_score(SequentialEnsemble(trainingModel=LinearRegression(random_state=SEED),objective=individual_objective,lr=0.05, n_estimators=200),original_data,individual_objective,scoring="r2",cv=10, n_jobs=-1)
print(np.mean(res))
print(res)

0.11965824227046191
[ 0.23730229  0.18918388  0.17270032  0.21708655 -0.10377557  0.0426274
  0.111561    0.2029627   0.08060447  0.04632938]


In [None]:
res = cross_val_score(SequentialEnsemble(trainingModel=BayesianRidge(random_state=SEED),objective=individual_objective,lr=0.06, n_estimators=200),original_data,individual_objective,scoring="r2",cv=10)
print(np.mean(res))
print(res)
# Bad results because it is a linear model, doesn't correct well non linear remainders like the one we are dealing with.

0.1262970684937635
[0.19712728 0.17128294 0.13593283 0.20394962 0.05053328 0.05437013
 0.18902791 0.13823466 0.07969064 0.04282139]


In [None]:
res = cross_val_score(SequentialEnsemble(trainingModel=GaussianProcessRegressor(random_state=SEED),objective=individual_objective,lr=0.06, n_estimators=200),original_data,individual_objective,scoring="r2",cv=10, n_jobs=-1)
np.mean(res)
print(res)
# Takes too long because it is not good for ensembles

In [None]:
ensemble_tree = SequentialEnsemble(
    trainingModel=DecisionTreeRegressor(max_depth=3, random_state=SEED), # Avoid overadjustment (memorizing the training data) with small trees
    objective=individual_objective,
    n_estimators = 200,
    lr=0.4,
)
ensemble_tree.fit(original_data, individual_objective)
predicciones = ensemble_tree.predict(X=original_data)
print('R2 = ', r2_score(individual_objective,predicciones))

R2 =  0.8881963071958137


In [None]:
def create_network(input_shape) -> Sequential:
    model = Sequential([
        Input(shape=(input_shape,)),  # Data input layer
        Dense(4, activation='relu'),
        Dense(1)  # Linear output for regresion
    ])
    model.compile(optimizer='adam', loss='mse')
    return model  

def create_Regressor_network(input_shape) -> KerasRegressor:
# Wrap the network in a Scikit-learn compatible estimator
    return KerasRegressor(
        model = lambda: create_network(input_shape),
        epochs=30,
        batch_size=32,
        verbose=0
    )

ensemble_neural_network = SequentialEnsemble(
    trainingModel=create_Regressor_network,
    objective=individual_objective,
    lr=0.05
)
ensemble_neural_network.fit(original_data,individual_objective)
predicciones = ensemble_neural_network.predict(original_data)
print(r2_score(individual_objective,predicciones))

In [None]:
ensemble_Bayes = SequentialEnsemble(
    trainingModel=BayesianRidge(random_state=SEED),
    objective=individual_objective,
    n_estimators = 200,
    lr=0.05
)
ensemble_Bayes.fit(original_data, individual_objective)
predicciones = ensemble_Bayes.predict(original_data)
print('R2 = ', r2_score(individual_objective,predicciones))

R2 =  0.14427448642762042


In [None]:
ensemble_kNN = SequentialEnsemble(
    trainingModel=KNeighborsRegressor(n_neighbors=5, weights='distance', random_state=SEED),
    n_estimators = 70,
    lr=0.05,
    normalization=True,
    objective=individual_objective
)
ensemble_kNN.fit(original_data, individual_objective)
predicciones = ensemble_kNN.predict(original_data)
print('R2 = ', r2_score(individual_objective,predicciones))

R2 =  0.9652082136385102


In [None]:
ensemble_Gaussian = SequentialEnsemble(
    trainingModel=GaussianProcessRegressor(random_state=SEED),
    n_estimators = 90,
    lr=0.05,
    objective=individual_objective
)
ensemble_Gaussian.fit(original_data, individual_objective)
predicciones = ensemble_Gaussian.predict(X=original_data)
print('R2 = ', r2_score(individual_objective,predicciones))

R2 =  0.9949326068550854


In [4]:
transform_csv('csv/house_prices.csv')[0]

Unnamed: 0,GarageCars,Condition2,YearBuilt,GarageYrBlt,LandContour,LowQualFinSF,HouseStyle,GarageType,MSSubClass,WoodDeckSF,...,SaleType,MiscVal,BsmtExposure,OpenPorchSF,ExterCond,Fireplaces,FullBath,BsmtQual,MiscFeature,PoolQC
0,2,2.0,1962,1977.0,3.0,0,2.0,5.0,20,0,...,7.0,0,3.0,0,3.0,0,1,3.0,4.0,2.0
1,0,2.0,1914,0.0,3.0,0,4.0,6.0,75,0,...,7.0,0,3.0,291,3.0,1,2,3.0,4.0,2.0
2,2,2.0,1999,1999.0,3.0,0,2.0,1.0,20,0,...,7.0,0,0.0,35,3.0,0,2,2.0,4.0,2.0
3,1,2.0,1948,1948.0,0.0,0,5.0,1.0,20,103,...,7.0,0,3.0,0,1.0,0,3,3.0,4.0,2.0
4,2,2.0,1950,1950.0,3.0,0,2.0,5.0,20,0,...,7.0,0,3.0,29,3.0,0,1,4.0,4.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555,1,2.0,1959,1959.0,3.0,0,7.0,1.0,80,86,...,7.0,0,0.0,0,1.0,1,1,3.0,4.0,2.0
556,1,2.0,1934,1939.0,1.0,0,5.0,5.0,70,0,...,7.0,0,3.0,0,3.0,1,1,3.0,4.0,2.0
557,2,2.0,1882,1925.0,3.0,0,5.0,1.0,70,0,...,7.0,0,3.0,169,1.0,1,1,3.0,4.0,2.0
558,1,2.0,1953,1953.0,3.0,0,2.0,1.0,20,0,...,7.0,0,3.0,18,3.0,0,1,3.0,4.0,2.0


In [None]:
# TESTING PLAYGROUND
originalData = pd.read_csv('csv/house_prices.csv')
#data.iloc[0]
#data.iloc[0]['GarageCars']
originalData.columns
#data.iloc[0,1]
#key = data.iloc[0,:1]
#key
#is_string_dtype(data[data.columns[4]])
#is_string_dtype(data[data.columns[4]])
#data.columns[0]
originalData["PoolQC"]

0      none
1      none
2      none
3      none
4      none
       ... 
555    none
556    none
557    none
558    none
559    none
Name: PoolQC, Length: 560, dtype: object