# Proyecto Individual 02
🖥️ **Machine Learning** 🖥️ <br>
🔹Zapata, María Belén

--------------------------------------------------------------------- Modelo Supervisado ---------------------------------------------------------------------

In [1]:
#Librerías generales: 
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer

#librerías para la creación del Pipeline:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

#Librerías para el Modelo:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score

## 🟣 Pipeline

Para el tratamiento de los datos en los 2 archivos que se me fueron entregados, desarrollaré un pipeline para asegurarme que todos los cambios que se hagan sean correctos. 

🔹 **Creación de las clases:** <br>
Las clases son necesarias para introducirlas en el Pipeline. <br> 
Para este proyecto hago 2 pipelines, ya que algunos cambios son necesarios de hacer en el archivo de entrenamiento, pero no están permitidas en el archivo de testeo. 

In [2]:
#Importo librerías necesarias:

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
    #Si bien las librerías son importadas en el primer bloque del notebook, decidí reintroducirlas en los momentos donde son necesarias en el código, para mayor comprensión del paso a paso. 

* **Pipeline_both** = este es utilizable en ambos datasets.

In [3]:
#Esta clase rellena los valores nulos de las columnas "laundry_options" y "parking_options" con el valor "unknown", principalmente para que no descarte filas por tener nulos. 
class FillNas(BaseEstimator, TransformerMixin):
    def __init__(self, columns=["laundry_options","parking_options"]):
        self.columns = columns
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        X[self.columns] = X[self.columns].fillna("unknown")
        return X

#Esta clase descarta las columnas definitivamente innecesarias del dataset.
class DropColumns(TransformerMixin):
    def __init__(self, columns=["id", "url", "region_url", "image_url"]):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.drop(columns=self.columns)
        return X

#Creo el Pipeline, guardando las clases en la variable "processes_both", para mayor orden. 
processes_both = [('fill_nas', FillNas()),             
            ('drop_columns', DropColumns())]
pipeline_both = Pipeline(processes_both)
    #Nótese que nombro a este pipeline con el sufijo "both", para recordar que estos cambios pueden hacerse en el test.parquet. 

* **Pipeline_train** = este SOLO se debe utilizar en el dataset de entrenamiento. 

In [4]:
#Esta clase quita los valores duplicados que encuentra en la columna descripción, quitándo de en medio las filas duplicadas. 
#Luego elimina la columna, ya que no tiene utilidad. 
class RemoveDuplicates(BaseEstimator, TransformerMixin):
    def __init__(self, column_name = 'description'):
        self.column_name = column_name
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = X.drop_duplicates(subset=[self.column_name])
        X = X.drop(columns=[self.column_name])
        return X


#Esta clase remueve todas las filas que aun contienen valores nulos.
class DropNullValues(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = X.dropna()
        return X

#Esta clase crea la columna "category_price", le asigna los valores basándose en la columna 'price', y luego descarta dicha columna del dataset. 
class CreateCategoryPrice(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X['category_price'] = np.where(X['price']<=999,1,0)
        X = X.drop(columns=['price'])
        return X

#Creo el Pipeline, guardando las clases en la variable "processes", para mayor orden. 

processes_train = [('remove_duplicates', RemoveDuplicates()),
             ('drop_null_values', DropNullValues()),
             ('create_category_price', CreateCategoryPrice())]
pipeline_train = Pipeline(processes_train)

# 🟣 Train.parquet

🔹 Comienzo trabajando sobre el archivo de entrenamiento. 

In [5]:
df_train = pd.read_parquet("train.parquet")
    #Cargar el dataset de entrenamiento.
df_train
    #Reviso que se haya cargado correctamente.

Unnamed: 0,id,url,region,region_url,price,type,sqfeet,beds,baths,cats_allowed,...,wheelchair_access,electric_vehicle_charge,comes_furnished,laundry_options,parking_options,image_url,description,lat,long,state
0,7048013474,https://boise.craigslist.org/apa/d/very-nice-b...,boise,https://boise.craigslist.org,1350,house,1200,2,2.0,1,...,0,0,0,w/d in unit,detached garage,https://images.craigslist.org/00B0B_cPiJMEheZe...,Super cute row house in the Boise bench area. ...,43.5851,-116.2250,id
1,7043931179,https://cosprings.craigslist.org/apa/d/colorad...,colorado springs,https://cosprings.craigslist.org,1115,apartment,694,1,1.0,1,...,0,0,0,w/d in unit,carport,https://images.craigslist.org/00R0R_5XAoSKvfrz...,Windtree Apartment Homes currently has a spaci...,38.9137,-104.7800,co
2,7048254516,https://norfolk.craigslist.org/apa/d/virginia-...,norfolk / hampton roads,https://norfolk.craigslist.org,1129,apartment,900,2,2.0,0,...,0,0,0,w/d hookups,off-street parking,https://images.craigslist.org/00f0f_3ZbTFrsHpZ...,Call Today! show contact info Indian Lakes ...,36.7922,-76.1643,va
3,7041032577,https://phoenix.craigslist.org/nph/apa/d/phoen...,phoenix,https://phoenix.craigslist.org,1580,house,1469,3,2.0,1,...,0,0,0,w/d in unit,,https://images.craigslist.org/00f0f_aXV5Dkd5qk...,Cody Anderson Two Brothers Realty License #: S...,33.5623,-112.0560,az
4,7048588701,https://nashville.craigslist.org/apa/d/antioch...,nashville,https://nashville.craigslist.org,995,apartment,700,1,1.0,1,...,0,0,0,w/d in unit,carport,https://images.craigslist.org/00606_gYOGKClOHv...,To schedule a tour We now book our tour appoin...,36.0595,-86.6592,tn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346474,7050982281,https://evansville.craigslist.org/apa/d/evansv...,evansville,https://evansville.craigslist.org,672,apartment,660,1,1.0,1,...,0,0,0,laundry on site,,https://images.craigslist.org/00x0x_9oaHKZ8Ilp...,"The pool, the office, the 24-hour on-site laun...",37.9591,-87.5293,in
346475,7049418251,https://sandiego.craigslist.org/ssd/apa/d/chul...,san diego,https://sandiego.craigslist.org,2122,apartment,1099,2,2.0,1,...,0,0,0,w/d in unit,off-street parking,https://images.craigslist.org/00O0O_6QsU4p5WMh...,Seize your chance to live in our beautiful apa...,32.6279,-117.0370,ca
346476,7048268235,https://columbia.craigslist.org/apa/d/columbia...,columbia,https://columbia.craigslist.org,1014,apartment,1104,2,2.0,1,...,0,0,0,w/d hookups,off-street parking,https://images.craigslist.org/00000_dpNWqzYhGK...,Tucked into the rolling landscape of Southeast...,33.9659,-80.9355,sc
346477,7026721229,https://nd.craigslist.org/apa/d/minot-open-con...,north dakota,https://nd.craigslist.org,935,apartment,1050,2,2.0,0,...,0,0,0,w/d in unit,detached garage,https://images.craigslist.org/00Z0Z_65g6Cty1RX...,Enjoy living at Southwood Apartments! Located ...,48.1995,-101.2800,nd


* Analisis de los datos:

In [6]:
df_train.shape
    #Miro el tamaño del dataset

(346479, 22)

In [7]:
df_train.describe()
    #Describo los datos.

Unnamed: 0,id,price,sqfeet,beds,baths,cats_allowed,dogs_allowed,smoking_allowed,wheelchair_access,electric_vehicle_charge,comes_furnished,lat,long
count,346479.0,346479.0,346479.0,346479.0,346479.0,346479.0,346479.0,346479.0,346479.0,346479.0,346479.0,344757.0,344757.0
mean,7040988000.0,9664.42,1066.326,1.903189,1.480339,0.726803,0.707861,0.731738,0.08197,0.012792,0.048141,37.234363,-92.705415
std,8802214.0,4703573.0,20185.37,3.162685,0.608246,0.445602,0.454746,0.443055,0.27432,0.112374,0.214065,5.550956,16.551071
min,7003808000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-43.5333,-163.894
25%,7035988000.0,805.0,750.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,33.4531,-100.784
50%,7043325000.0,1036.0,949.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,37.6501,-87.7108
75%,7048431000.0,1395.0,1150.0,2.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,41.1379,-81.1746
max,7051292000.0,2768307000.0,8388607.0,1100.0,75.0,1.0,1.0,1.0,1.0,1.0,1.0,102.036,172.633


In [8]:
df_train.dtypes
    #Miro los tipos de datos por columna.

id                           int64
url                         object
region                      object
region_url                  object
price                        int64
type                        object
sqfeet                       int64
beds                         int64
baths                      float64
cats_allowed                 int64
dogs_allowed                 int64
smoking_allowed              int64
wheelchair_access            int64
electric_vehicle_charge      int64
comes_furnished              int64
laundry_options             object
parking_options             object
image_url                   object
description                 object
lat                        float64
long                       float64
state                       object
dtype: object

* Aplicación de los pipelines: <br>
🔹 Metodo **fit**: Ajusta los datos al modelo. (El modelo aprende de los datos y encuentra patrones en ellos). <br>
🔹 Metodo **transform**: Aplica los patrones aprendidos en el método "fit" a los datos de entrada. (Transforma los datos de acuerdo a las reglas aprendidas)

In [9]:
#Aplico el primer Pipeline.
pipeline_both.fit(df_train)
df_train = pipeline_both.transform(df_train)

#Aplico el segundo Pipeline.
pipeline_train.fit(df_train)
df_train = pipeline_train.transform(df_train)

In [10]:
df_train
    #Miro una última vez que el dataset esté listo para ser utilizado. 

Unnamed: 0,region,type,sqfeet,beds,baths,cats_allowed,dogs_allowed,smoking_allowed,wheelchair_access,electric_vehicle_charge,comes_furnished,laundry_options,parking_options,lat,long,state,category_price
0,boise,house,1200,2,2.0,1,1,1,0,0,0,w/d in unit,detached garage,43.5851,-116.2250,id,0
1,colorado springs,apartment,694,1,1.0,1,1,1,0,0,0,w/d in unit,carport,38.9137,-104.7800,co,0
2,norfolk / hampton roads,apartment,900,2,2.0,0,0,1,0,0,0,w/d hookups,off-street parking,36.7922,-76.1643,va,0
3,phoenix,house,1469,3,2.0,1,1,1,0,0,0,w/d in unit,unknown,33.5623,-112.0560,az,0
4,nashville,apartment,700,1,1.0,1,1,1,0,0,0,w/d in unit,carport,36.0595,-86.6592,tn,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346470,tampa bay area,apartment,1054,2,2.0,1,1,0,0,0,0,w/d in unit,attached garage,27.9523,-82.4476,fl,0
346471,south dakota,apartment,600,2,1.0,1,1,0,0,0,0,laundry on site,off-street parking,43.7198,-98.0187,sd,1
346475,san diego,apartment,1099,2,2.0,1,1,1,0,0,0,w/d in unit,off-street parking,32.6279,-117.0370,ca,0
346477,north dakota,apartment,1050,2,2.0,0,0,0,0,0,0,w/d in unit,detached garage,48.1995,-101.2800,nd,1


# Creación del modelo

In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score
    #Si bien las librerías son importadas en el primer bloque del notebook, decidí reintroducirlas en los momentos donde son necesarias en el código, para mayor comprensión del paso a paso. 

* Creo el modelo 'Arbol de Clasificación'

In [12]:
X=df_train[['sqfeet', 'beds', 'baths', 'cats_allowed', 'dogs_allowed', 'smoking_allowed', 'wheelchair_access', 'electric_vehicle_charge', 'comes_furnished', 'lat', 'long']]
    #Le indico las columnas específicas del dataset que quiero que observe para su entrenamiento. 
        #Esto lo hago así, en caso de querer probar con diferentes combinaciones de columnas. 
y=df_train['category_price']
    #Indico la variable objetivo.

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    #Divido el df_train en entrenamiento y testeo para el entrenamiento. 

model = DecisionTreeClassifier()
    #Guardo el modelo en la variable 'model'.

model.fit(X_train, y_train)
    #Entreno el modelo con los parametros de entrenamiento.

y_pred = model.predict(X_test)
    #Hago una predicción sobre la variable objetivo utilizando el modelo entrenado previamente y los datos de prueba.

* Verifico que tanto el accuracy como el recall tengan resultados favorables. 

In [13]:
accuracy = accuracy_score(y_test, y_pred)
    #Calculo el "Accuracy" del modelo
print("Accuracy:", accuracy)

Accuracy: 0.9115114010366371


In [14]:
recall = recall_score(y_test, y_pred)
    #Calculo el "Recall" del modelo
print("Recall: ", recall)

Recall:  0.8929451924900437


Con estos resultados, puedo determinar que el modelo funciona correctamente, ya que ambos números superan el 0.80. 

# 🟣 Test.parquet

🔹 Proceso a trabajar sobre el archivo de testeo. 

Este dataset fue separado del train.parquet antes de iniciar con el proyecto. Es idéntico al train.parquet, solo que no posee la columna "price", tiene considerablemente menos registros, y dichos registros son distintos a los que hay en el train.parquet. 

In [15]:
#Cargar el dataset de test.
df_test = pd.read_parquet("test.parquet")
    #Cargar el dataset de train.
df_test
    #Reviso que se haya cargado correctamente.

Unnamed: 0,id,url,region,region_url,type,sqfeet,beds,baths,cats_allowed,dogs_allowed,...,wheelchair_access,electric_vehicle_charge,comes_furnished,laundry_options,parking_options,image_url,description,lat,long,state
0,7037609789,https://annarbor.craigslist.org/apa/d/wixom-ho...,ann arbor,https://annarbor.craigslist.org,manufactured,1344,3,2.0,0,0,...,0,0,0,w/d in unit,off-street parking,https://images.craigslist.org/00M0M_iNczP1nzIL...,"OPEN HOUSE TODAY! APPLY THIS WEEK, PUT A HOLDI...",42.5333,-83.5763,mi
1,7032406876,https://vermont.craigslist.org/apa/d/randolph-...,vermont,https://vermont.craigslist.org,apartment,1050,2,1.0,0,0,...,0,0,0,w/d hookups,off-street parking,https://images.craigslist.org/00L0L_ecirmYBIzL...,"Think of it, you'll be first to get your mail....",43.9393,-72.5538,vt
2,7037022682,https://annarbor.craigslist.org/apa/d/ann-arbo...,ann arbor,https://annarbor.craigslist.org,apartment,1150,2,2.0,1,1,...,1,0,0,w/d in unit,carport,https://images.craigslist.org/00e0e_dPln2xjo9g...,One of Ann Arbor's most luxurious apartment co...,42.2492,-83.7712,mi
3,7048681802,https://fortcollins.craigslist.org/apa/d/fort-...,fort collins / north CO,https://fortcollins.craigslist.org,apartment,1280,2,2.5,1,1,...,0,0,0,w/d in unit,attached garage,https://images.craigslist.org/00L0L_jlektT5cSd...,"Specials! Move in before January 16th, 2020 an...",40.5501,-105.0350,co
4,7043597870,https://charlottesville.craigslist.org/apa/d/c...,charlottesville,https://charlottesville.craigslist.org,apartment,783,2,1.0,1,1,...,0,0,0,laundry on site,,https://images.craigslist.org/00D0D_cXa4KbZ6ox...,Barracks West Apartments & Townhomes in Charlo...,38.0936,-78.5611,va
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38493,7041556338,https://mobile.craigslist.org/apa/d/daphne-lux...,mobile,https://mobile.craigslist.org,apartment,1180,2,2.0,1,1,...,1,0,0,w/d in unit,detached garage,https://images.craigslist.org/01616_lCR9AY6Vlb...,At Belforest Villas youâll have all the conv...,30.6197,-87.8895,al
38494,7051072582,https://elpaso.craigslist.org/apa/d/el-paso-th...,el paso,https://elpaso.craigslist.org,apartment,1138,3,2.0,1,1,...,0,0,0,w/d hookups,off-street parking,https://images.craigslist.org/01010_fEVpb2QLmX...,Ready for the CrossPointe Experience show con...,31.8045,-105.9660,tx
38495,7048966175,https://tampa.craigslist.org/hil/apa/d/brandon...,tampa bay area,https://tampa.craigslist.org,apartment,743,1,1.0,1,1,...,0,0,0,w/d in unit,off-street parking,https://images.craigslist.org/00r0r_b7LZqSM75f...,To schedule a tour We now book our tour appoin...,27.8971,-82.3387,fl
38496,7044693740,https://mohave.craigslist.org/apa/d/fort-mohav...,mohave county,https://mohave.craigslist.org,house,1276,3,2.0,0,0,...,0,0,0,w/d hookups,attached garage,https://images.craigslist.org/00606_21aHFx5Gtq...,"House for Rent (1 year lease - min. ) - 3 Bed,...",35.0052,-114.5690,az


* Análisis de datos: 

In [16]:
df_test.shape
    #Miro el tamaño del dataset. Acá notamos que posee una columna menos que el df_train. 
        #La faltante es, como dicho anteriormente, 'price'.

(38498, 21)

In [17]:
df_test.describe()
    #describo los datos. 

Unnamed: 0,id,sqfeet,beds,baths,cats_allowed,dogs_allowed,smoking_allowed,wheelchair_access,electric_vehicle_charge,comes_furnished,lat,long
count,38498.0,38498.0,38498.0,38498.0,38498.0,38498.0,38498.0,38498.0,38498.0,38498.0,38302.0,38302.0
mean,7040931000.0,1002.062964,1.924749,1.484129,0.727674,0.708426,0.732064,0.083381,0.013585,0.048002,37.225599,-92.657573
std,8783775.0,686.933541,5.665451,0.700228,0.445162,0.454493,0.44289,0.276461,0.115762,0.213774,5.502983,16.359293
min,7004010000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.25383,-159.42
25%,7035888000.0,750.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,33.4717,-99.79
50%,7043099000.0,947.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,37.61905,-87.85785
75%,7048393000.0,1150.0,2.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,41.1468,-81.242075
max,7051284000.0,95242.0,1100.0,75.0,1.0,1.0,1.0,1.0,1.0,1.0,64.881,94.1248


In [18]:
df_test.dtypes
    #Miro los tipos de datos por columna. 
        #Coinciden con los tipos de datos del train.parquet.

id                           int64
url                         object
region                      object
region_url                  object
type                        object
sqfeet                       int64
beds                         int64
baths                      float64
cats_allowed                 int64
dogs_allowed                 int64
smoking_allowed              int64
wheelchair_access            int64
electric_vehicle_charge      int64
comes_furnished              int64
laundry_options             object
parking_options             object
image_url                   object
description                 object
lat                        float64
long                       float64
state                       object
dtype: object

In [19]:
df_test.isnull().sum()
    #Reviso si hay nulos en este dataset, dado que mi modelo no los admite. 
        #Noto que hay algunos en las columnas 'lat' y 'long', y como necesito de ambas para mi modelo, debo completarlos. 

id                             0
url                            0
region                         0
region_url                     0
type                           0
sqfeet                         0
beds                           0
baths                          0
cats_allowed                   0
dogs_allowed                   0
smoking_allowed                0
wheelchair_access              0
electric_vehicle_charge        0
comes_furnished                0
laundry_options             7855
parking_options            14005
image_url                      0
description                    0
lat                          196
long                         196
state                          0
dtype: int64

* Aplicación de los pipelines: <br>
⚠️ Recordamos que solo 1 de los dos pipelines se puede utilizar con este dataset. <br>
❌*pipeline_train NO se puede utilizar, porque elimina registros, y no está permitido.*

In [20]:
#Aplico el pipeline correcto.
pipeline_both.fit(df_test)
df_test = pipeline_both.transform(df_test)

In [21]:
df_test.head()
    #revisamos que se hayan aplicado los cambios.

Unnamed: 0,region,type,sqfeet,beds,baths,cats_allowed,dogs_allowed,smoking_allowed,wheelchair_access,electric_vehicle_charge,comes_furnished,laundry_options,parking_options,description,lat,long,state
0,ann arbor,manufactured,1344,3,2.0,0,0,1,0,0,0,w/d in unit,off-street parking,"OPEN HOUSE TODAY! APPLY THIS WEEK, PUT A HOLDI...",42.5333,-83.5763,mi
1,vermont,apartment,1050,2,1.0,0,0,1,0,0,0,w/d hookups,off-street parking,"Think of it, you'll be first to get your mail....",43.9393,-72.5538,vt
2,ann arbor,apartment,1150,2,2.0,1,1,1,1,0,0,w/d in unit,carport,One of Ann Arbor's most luxurious apartment co...,42.2492,-83.7712,mi
3,fort collins / north CO,apartment,1280,2,2.5,1,1,0,0,0,0,w/d in unit,attached garage,"Specials! Move in before January 16th, 2020 an...",40.5501,-105.035,co
4,charlottesville,apartment,783,2,1.0,1,1,1,0,0,0,laundry on site,unknown,Barracks West Apartments & Townhomes in Charlo...,38.0936,-78.5611,va


⚠️ Como notamos previamente, existen nulos en algunas de las columnas necesarias para la predicción. <br>
🔹 Para eso, importo KNNImputer para interpolar los datos faltantes. 

In [22]:
from sklearn.impute import KNNImputer

# Crea un objeto KNNImputer
imputer = KNNImputer(n_neighbors=3)

# Aplica la interpolación a las columnas 'lat' y 'long'
df_test[['lat', 'long']] = imputer.fit_transform(df_test[['lat', 'long']])

# Predicción: 

In [23]:
X_test = df_test[['sqfeet', 'beds', 'baths', 'cats_allowed', 'dogs_allowed', 'smoking_allowed', 'wheelchair_access', 'electric_vehicle_charge', 'comes_furnished', 'lat', 'long']]
    #creo una matriz de características. Estas son las mismas que separé para el arbol en el archivo train. 

predictions = model.predict(X_test)
    #hago predicciones en el archivo de prueba

df_test["pred"] = predictions
    #Asigno las predicciones a la columna "pred" vacía creada previamente.

In [24]:
df_test.head()
    #Reviso que se haya aplicado la columna 'pred' con valores correctos. 

Unnamed: 0,region,type,sqfeet,beds,baths,cats_allowed,dogs_allowed,smoking_allowed,wheelchair_access,electric_vehicle_charge,comes_furnished,laundry_options,parking_options,description,lat,long,state,pred
0,ann arbor,manufactured,1344,3,2.0,0,0,1,0,0,0,w/d in unit,off-street parking,"OPEN HOUSE TODAY! APPLY THIS WEEK, PUT A HOLDI...",42.5333,-83.5763,mi,0
1,vermont,apartment,1050,2,1.0,0,0,1,0,0,0,w/d hookups,off-street parking,"Think of it, you'll be first to get your mail....",43.9393,-72.5538,vt,0
2,ann arbor,apartment,1150,2,2.0,1,1,1,1,0,0,w/d in unit,carport,One of Ann Arbor's most luxurious apartment co...,42.2492,-83.7712,mi,0
3,fort collins / north CO,apartment,1280,2,2.5,1,1,0,0,0,0,w/d in unit,attached garage,"Specials! Move in before January 16th, 2020 an...",40.5501,-105.035,co,0
4,charlottesville,apartment,783,2,1.0,1,1,1,0,0,0,laundry on site,unknown,Barracks West Apartments & Townhomes in Charlo...,38.0936,-78.5611,va,0


---

# 🔱 Entrega 🔱

Una vez la predicción se ha realizado, y tengo la columna necesaria completa, procedo a hacer el último paso para poder entregar. 

In [25]:
df_test[["pred"]].to_csv("BeeluRiddle.csv", index=False)
    #Exporto el archivo a presentar, con una sola columna, sin index, y en csv. 

In [26]:
df_final = pd.read_csv('BeeluRiddle.csv')
    #Cargo el csv.
df_final
    #Imprimo para corroborar que esté todo funcional y correcto. 

Unnamed: 0,pred
0,0
1,0
2,0
3,0
4,0
...,...
38493,0
38494,1
38495,1
38496,0


---