In [None]:
# Importar librerías

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score


In [None]:
# Cargar datos

base = pd.read_csv('PortoPortugal_50.csv')

In [4]:
base['precio_alto'] = np.where(base['price'] > base['price'].median(), 1, 0)
base['disponibilidad_alta'] = np.where(base['availability_365'] > 180, 1, 0)
base['muchas_resenas'] = np.where(base['number_of_reviews'] > base['number_of_reviews'].median(), 1, 0)
base['es_superhost'] = np.where(base['host_is_superhost'] == 't', 1, 0)
base['reserva_instantanea'] = np.where(base['instant_bookable'] == 't', 1, 0)
base['casa_entera'] = np.where(base['room_type'] == 'Entire home/apt', 1, 0)
base['rating_alto'] = np.where(base['review_scores_rating'] >= 90, 1, 0)
base['limpieza_alta'] = np.where(base['review_scores_cleanliness'] >= 4.5, 1, 0)
base['aceptacion_alta'] = np.where(base['host_acceptance_rate'] >= 0.9, 1, 0)
base['varias_camas'] = np.where(base['beds'] >= 2, 1, 0)


In [None]:
# Variables predictoras y objetivo
casos = [
    {'X': ['accommodates'], 'y': 'precio_alto'},
    {'X': ['availability_365'], 'y': 'disponibilidad_alta'},
    {'X': ['number_of_reviews'], 'y': 'muchas_resenas'},
    {'X': ['host_listings_count'], 'y': 'es_superhost'},
    {'X': ['price'], 'y': 'reserva_instantanea'},
    {'X': ['bedrooms'], 'y': 'casa_entera'},
    {'X': ['review_scores_value'], 'y': 'rating_alto'},
    {'X': ['review_scores_cleanliness'], 'y': 'limpieza_alta'},
    {'X': ['host_acceptance_rate'], 'y': 'aceptacion_alta'},
    {'X': ['beds'], 'y': 'varias_camas'},
]

In [None]:
# Regresiones logísticas

resultados = []

for i, caso in enumerate(casos):
    X = base[caso['X']]
    y = base[caso['y']]

    datos = pd.concat([X, y], axis=1).dropna()
    X = datos[caso['X']]
    y = datos[caso['y']]

    if len(y.unique()) < 2:
        continue

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)

    modelo = LogisticRegression()
    modelo.fit(X_train, y_train)

    y_pred = modelo.predict(X_test)

    precision = precision_score(y_test, y_pred)
    exactitud = accuracy_score(y_test, y_pred)
    sensibilidad = recall_score(y_test, y_pred)

    resultados.append({
        'Caso': i+1,
        'Variable dependiente': caso['y'],
        'Variable independiente': caso['X'][0],
        'Precisión': round(precision, 3),
        'Exactitud': round(exactitud, 3),
        'Sensibilidad': round(sensibilidad, 3)
    })

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Tabla de resultados
resultados_final = pd.DataFrame(resultados)
print(resultados_final)

   Caso Variable dependiente     Variable independiente  Precisión  Exactitud  \
0     1          precio_alto               accommodates      0.727      0.672   
1     2  disponibilidad_alta           availability_365      1.000      1.000   
2     3       muchas_resenas          number_of_reviews      1.000      1.000   
3     4         es_superhost        host_listings_count      0.000      0.661   
4     5  reserva_instantanea                      price      0.698      0.698   
5     6          casa_entera                   bedrooms      0.832      0.832   
6     8        limpieza_alta  review_scores_cleanliness      0.983      0.985   
7     9      aceptacion_alta       host_acceptance_rate      0.974      0.976   
8    10         varias_camas                       beds      1.000      1.000   

   Sensibilidad  
0         0.363  
1         1.000  
2         1.000  
3         0.000  
4         1.000  
5         1.000  
6         1.000  
7         1.000  
8         1.000  
