In [1]:
import polars as pl
import pickle
import sklearn
import pandas as pd

In [2]:
data_path = r'C:\Users\fscielzo\Documents\Docker-Apps\madrid-house-prices-app\data\madrid_houses.csv'
madrid_houses_df = pl.read_csv(data_path)
variables_to_remove = ['', 'id', 'district', 'neighborhood']
madrid_houses_df = madrid_houses_df.select(pl.exclude(variables_to_remove))

unique_values = madrid_houses_df['house_type'].unique().to_list()
unique_values = [str(x) for x in unique_values]
new_values = [f'House type {i}' for i in unique_values]
replace_dict = dict(zip(unique_values, new_values))
madrid_houses_df = madrid_houses_df.with_columns(madrid_houses_df['house_type'].cast(str))
madrid_houses_df = madrid_houses_df.with_columns(pl.col('house_type').replace(replace_dict))

In [3]:
madrid_houses_df.columns

['sq_mt_built',
 'n_rooms',
 'n_bathrooms',
 'n_floors',
 'sq_mt_allotment',
 'floor',
 'buy_price',
 'is_renewal_needed',
 'has_lift',
 'is_exterior',
 'energy_certificate',
 'has_parking',
 'house_type']

In [4]:
madrid_houses_df.to_pandas().dtypes

sq_mt_built           float64
n_rooms                 int64
n_bathrooms             int64
n_floors                int64
sq_mt_allotment       float64
floor                   int64
buy_price               int64
is_renewal_needed        bool
has_lift                 bool
is_exterior              bool
energy_certificate      int64
has_parking              bool
house_type             object
dtype: object

In [5]:
X = madrid_houses_df.to_pandas()

In [6]:
features_dtypes = X.dtypes
features_metadata = {}
for col in X.columns:
    if features_dtypes[col] == 'bool': # boolean case
        features_metadata[col] = {'dtype': features_dtypes[col],
                                  'unique_values': [str(x) for x in X[col].unique()]}
    elif features_dtypes[col] in ['int64', 'float64']: # numerical case
        features_metadata[col] = {'dtype': features_dtypes[col],
                                  'min': X[col].min(),
                                  'max': X[col].max()}        
    else: # string case
        features_metadata[col] = {'dtype': X.dtypes[col],
                                  'unique_values': X[col].unique()}        

In [7]:
features_metadata

{'sq_mt_built': {'dtype': dtype('float64'), 'min': 13.0, 'max': 2400.0},
 'n_rooms': {'dtype': dtype('int64'), 'min': 0, 'max': 24},
 'n_bathrooms': {'dtype': dtype('int64'), 'min': 1, 'max': 16},
 'n_floors': {'dtype': dtype('int64'), 'min': 1, 'max': 7},
 'sq_mt_allotment': {'dtype': dtype('float64'), 'min': 0.0, 'max': 21000.0},
 'floor': {'dtype': dtype('int64'), 'min': -5, 'max': 10},
 'buy_price': {'dtype': dtype('int64'), 'min': 36000, 'max': 8800000},
 'is_renewal_needed': {'dtype': dtype('bool'),
  'unique_values': ['False', 'True']},
 'has_lift': {'dtype': dtype('bool'), 'unique_values': ['False', 'True']},
 'is_exterior': {'dtype': dtype('bool'), 'unique_values': ['True', 'False']},
 'energy_certificate': {'dtype': dtype('int64'), 'min': 0, 'max': 7},
 'has_parking': {'dtype': dtype('bool'), 'unique_values': ['False', 'True']},
 'house_type': {'dtype': dtype('O'),
  'unique_values': array(['House type 1', 'House type 4', 'House type 3', 'House type 5',
         'House type 2

---

In [9]:
# Load the model
with open(r'C:\Users\fscielzo\Documents\Docker-Apps\madrid-house-prices-app\data\model.pkl', 'rb') as file:
    model = pickle.load(file)

In [10]:
X_new_1 = pd.read_csv('user_data_testing.csv')
X_new_1

Unnamed: 0,sq_mt_built,n_rooms,n_bathrooms,n_floors,sq_mt_allotment,floor,is_renewal_needed,has_lift,is_exterior,energy_certificate,has_parking,house_type
0,85,3,2,1,100,2,False,True,True,1,True,House type 1
1,120,4,3,2,150,1,True,False,True,2,False,House type 1
2,60,2,1,1,80,3,False,True,False,2,True,House type 3
3,95,3,2,1,700,4,False,False,True,3,False,House type 3
4,110,3,2,2,500,1,True,False,True,4,True,House type 2


In [14]:
X_new_2 = pd.read_csv('user_data_testing_2.csv')
X_new_2

Unnamed: 0,has_parking,n_rooms,sq_mt_built,n_bathrooms,n_floors,sq_mt_allotment,floor,is_renewal_needed,has_lift,is_exterior,energy_certificate,house_type
0,True,3,85,2,1,100,2,False,True,True,1,House type 1
1,False,4,120,3,2,150,1,True,False,True,2,House type 1
2,True,2,60,1,1,80,3,False,True,False,2,House type 3
3,False,3,95,2,1,700,4,False,False,True,3,House type 3
4,True,3,110,2,2,500,1,True,False,True,4,House type 2


In [12]:
model.predict(X_new_1)

array([306649.46937077, 380539.56362939, 266079.85287455, 515217.93674244,
       505299.28420115])

In [16]:
model.predict(X_new_2)

KeyError: "['n_rooms'] not in index"