#Installing dependencies

In [2]:
pip install pandas scikit-learn




#Creating the dataset

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error
import numpy as np

# Creación del dataset para el entrenamiento
data = pd.DataFrame({
    'Location': ['Downtown', 'Suburb', 'Rural', 'Urban', 'Coastal'],
    'Size': [1200, 1800, 2400, 1500, 2000],
    'Bedrooms': [3, 4, 5, 3, 4],
    'Age': [10, 5, 15, 7, 8],
    'Price': [350000, 420000, 300000, 375000, 450000]
})


#Preprocesing data

In [20]:
# Verificar si la columna 'Location' está en el DataFrame original
if 'Location' in data.columns:
    # Convertir la columna 'Location' usando one-hot encoding
    encoder = OneHotEncoder(sparse_output=False)
    encoded_locations = encoder.fit_transform(data[['Location']])
    encoded_df = pd.DataFrame(encoded_locations, columns=encoder.get_feature_names_out(['Location']))

    # Combinar las columnas codificadas con el resto del dataset y eliminar la columna original
    data = pd.concat([encoded_df, data[['Size', 'Bedrooms', 'Age', 'Price']]], axis=1)
else:
    print("La columna 'Location' no se encuentra en el DataFrame.")
    print("Verifique que el DataFrame contenga la columna correcta antes de continuar.")

# Revisión de los datos
print(data.head())

   Location_Coastal  Location_Downtown  Location_Rural  Location_Suburb  \
0               0.0                1.0             0.0              0.0   
1               0.0                0.0             0.0              1.0   
2               0.0                0.0             1.0              0.0   
3               0.0                0.0             0.0              0.0   
4               1.0                0.0             0.0              0.0   

   Location_Urban  Size  Bedrooms  Age   Price  
0             0.0  1200         3   10  350000  
1             0.0  1800         4    5  420000  
2             0.0  2400         5   15  300000  
3             1.0  1500         3    7  375000  
4             0.0  2000         4    8  450000  


#Dividing data for training and training the model

In [21]:
# Variables independientes (X) y variable dependiente (y)
X = data.drop('Price', axis=1)
y = data['Price']

# Dividir el dataset en datos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear y entrenar el modelo
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluar el modelo usando datos de prueba
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: ${mae:.2f}')


Mean Absolute Error: $47960.03


#Predicting the house price

In [24]:
def predict_house_price(location, size, bedrooms, age):
    location_encoded = encoder.transform([[location]])
    location_df = pd.DataFrame(location_encoded, columns=encoder.get_feature_names_out(['Location']))

    # Crear el DataFrame de entrada
    input_data = pd.concat([location_df, pd.DataFrame({'Size': [size], 'Bedrooms': [bedrooms], 'Age': [age]})], axis=1)

    # Si faltan columnas debido a la codificación, completarlas con ceros
    for col in X.columns:
        if col not in input_data.columns:
            input_data[col] = 0

    # Reordenar las columnas para que coincidan con el modelo entrenado
    input_data = input_data[X.columns]

    # Hacer la predicción
    predicted_price = model.predict(input_data)
    return predicted_price[0]

# Ejemplo de uso
predicted_price = predict_house_price('Downtown', 1300, 3, 12)
print(f'Estimated Price: ${predicted_price:.2f}')


Estimated Price: $316443.99


