# Export California Housing Dataset to DuckDB

In [1]:
import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn.preprocessing import StandardScaler

import duckdb

### Load iris data

In [2]:
cal_house = datasets.fetch_california_housing()

cal_house_cols = cal_house.feature_names
cal_house_cols

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [3]:
cal_house_data = cal_house.data

cal_house_data = pd.DataFrame(cal_house_data, columns=cal_house_cols)
cal_house_data['Value'] = cal_house.target
cal_house_data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Value
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


### Connect to the DB

In [4]:
con = duckdb.connect("../test.db")

### Create the iris table and populate

In [5]:
con.sql("DROP TABLE IF EXISTS cal_house")

create_sql = "CREATE TABLE cal_house (" + " , ".join([c + " FLOAT" for c in cal_house_data.columns]) + ");"
con.sql(create_sql)

con.sql("INSERT INTO cal_house (" + ", ".join(cal_house_data.columns) + ") SELECT * FROM cal_house_data")

con.sql("SELECT * FROM cal_house").show()

con.close()

┌────────┬──────────┬───────────┬────────────┬────────────┬───────────┬──────────┬───────────┬───────┐
│ MedInc │ HouseAge │ AveRooms  │ AveBedrms  │ Population │ AveOccup  │ Latitude │ Longitude │ Value │
│ float  │  float   │   float   │   float    │   float    │   float   │  float   │   float   │ float │
├────────┼──────────┼───────────┼────────────┼────────────┼───────────┼──────────┼───────────┼───────┤
│ 8.3252 │     41.0 │  6.984127 │  1.0238096 │      322.0 │ 2.5555556 │    37.88 │   -122.23 │ 4.526 │
│ 8.3014 │     21.0 │ 6.2381372 │  0.9718805 │     2401.0 │ 2.1098418 │    37.86 │   -122.22 │ 3.585 │
│ 7.2574 │     52.0 │  8.288136 │  1.0734463 │      496.0 │   2.80226 │    37.85 │   -122.24 │ 3.521 │
│ 5.6431 │     52.0 │  5.817352 │  1.0730593 │      558.0 │ 2.5479453 │    37.85 │   -122.25 │ 3.413 │
│ 3.8462 │     52.0 │  6.281853 │   1.081081 │      565.0 │ 2.1814673 │    37.85 │   -122.25 │ 3.422 │
│ 4.0368 │     52.0 │  4.761658 │   1.103627 │      413.0 │ 2.1398964 │  