In [1]:
# Importación Librerías
from sklearn.datasets import fetch_california_housing
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

# KDD Predicción de Precios de Viviendas

1. SELECCION DATOS

In [2]:
import pandas as pd

# Load the California Housing dataset from local CSV files
df_train = pd.read_csv('/content/sample_data/california_housing_train.csv')
df_test = pd.read_csv('/content/sample_data/california_housing_test.csv')
df = pd.concat([df_train, df_test], ignore_index=True)

2. PREPROCESAMIENTO

In [3]:
X = df.drop("median_house_value", axis=1)
y = df["median_house_value"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

3. TRANSFORMACIÓN

In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

4. MINERÍA

In [5]:
models = {
    "Regresión Lineal": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=200, max_depth=10),
    "SVR": SVR()
}

resultados = []

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    pred = model.predict(X_test_scaled)
    r2 = r2_score(y_test, pred)
    resultados.append([name, r2])

metricas = pd.DataFrame(resultados, columns=["Modelo", "R2"])
metricas


Unnamed: 0,Modelo,R2
0,Regresión Lineal,0.645335
1,Random Forest,0.793988
2,SVR,-0.044314


4.1 SELECCIÓN DEL MEJOR MODELO

In [6]:
best_model_name = metricas.sort_values("R2", ascending=False).iloc[0]["Modelo"]
best_model = models[best_model_name]

best_model_name

'Random Forest'

4.2 GENERAR EL MODELO Y OTROS PARA APLICACIÓN WEB

In [7]:
joblib.dump(best_model, "modelo.joblib")
joblib.dump(scaler, "scaler.joblib")
joblib.dump(metricas, "metricas.joblib")
joblib.dump(df.sample(1000, random_state=42), "muestra.joblib")
joblib.dump(df.corr(), "correlaciones.joblib")
joblib.dump(df.describe(), "stats.joblib")


['stats.joblib']

DESPLEGAR LA APLICACIÓN STREAMLIT

In [None]:
!pip install streamlit joblib numpy pandas scikit-learn plotly fpdf2 kaleido statsmodels


Collecting streamlit
  Downloading streamlit-1.53.0-py3-none-any.whl.metadata (10 kB)
Collecting fpdf2
  Downloading fpdf2-2.8.5-py3-none-any.whl.metadata (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting kaleido
  Downloading kaleido-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting choreographer>=1.1.1 (from kaleido)
  Downloading choreographer-1.2.1-py3-none-any.whl.metadata (6.8 kB)
Collecting logistro>=1.0.8 (from kaleido)
  Downloading logistro-2.0.1-py3-none-any.whl.metadata (3.9 kB)
Collecting pytest-timeout>=2.4.0 (from kaleido)
  Downloading pytest_timeout-2.4.0-py3-none-any.whl.metadata (20 kB)
Downloading streamlit-1.53.0-py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m69.9 MB/s[0m eta [36m0:00:00[0m
[?25

In [None]:
!streamlit run app.py --server.port 8501 --server.address 0.0.0.0 &>/content/streamlit.log &


In [None]:
!cat /content/streamlit.log


In [None]:
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb


Selecting previously unselected package cloudflared.
(Reading database ... 117528 files and directories currently installed.)
Preparing to unpack cloudflared-linux-amd64.deb ...
Unpacking cloudflared (2025.11.1) ...
Setting up cloudflared (2025.11.1) ...
Processing triggers for man-db (2.10.2-1) ...


In [None]:
!cloudflared tunnel --url http://localhost:8501


[90m2026-01-20T00:41:42Z[0m [32mINF[0m Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
[90m2026-01-20T00:41:42Z[0m [32mINF[0m Requesting new quick Tunnel on trycloudflare.com...
[90m2026-01-20T00:41:45Z[0m [32mINF[0m +--------------------------------------------------------------------------------------------+
[90m2026-01-20T00:41:45Z[0m [32mINF[0m |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
[90m2026