In [1]:
import pandas as pd
import json

def clean_and_normalize_data(df, normalization_range=(-1, 1)):
    
    df_processed = df.copy()
    
    original_columns = df_processed.columns.tolist()
    
    df_processed['floor'] = df_processed['floor'].replace('Bajo', '0')
    df_processed['floor'] = pd.to_numeric(df_processed['floor'], errors='coerce')
    
    df_processed['has_individual_heating'] = df_processed['has_individual_heating'].map({True: 1, False: 0})
    df_processed['has_central_heating'] = df_processed['has_central_heating'].map({True: 1, False: 0})
    
    house_types = df_processed['house_type_id'].unique().tolist()
    
    df_processed = pd.get_dummies(df_processed, columns=['house_type_id'], prefix='house_type')
    
    df_processed = df_processed.dropna()
    
    numerical_cols = ['sq_mt_built', 'sq_mt_useful', 'n_rooms', 'n_bathrooms', 
                     'floor', 'built_year', 'buy_price']
    
    normalization_info = {
        'params': {},
        'original_columns': original_columns,
        'house_types': house_types,
        'numerical_columns': numerical_cols,
        'dummy_columns': [col for col in df_processed.columns if col.startswith('house_type_')],
        'normalization_range': normalization_range
    }
    
    for col in numerical_cols:
        if col in df_processed.columns:
            min_val = df_processed[col].min()
            max_val = df_processed[col].max()
            if max_val != min_val:
                df_processed[col] = (normalization_range[0] + 
                    (df_processed[col] - min_val) * 
                    (normalization_range[1] - normalization_range[0]) / 
                    (max_val - min_val))
                normalization_info['params'][col] = {
                    'min': float(min_val),
                    'max': float(max_val),
                    'target_min': float(normalization_range[0]),
                    'target_max': float(normalization_range[1])
                }
    
    df_processed = df_processed.sample(frac=1, random_state=42).reset_index(drop=True)
    
    return df_processed, normalization_info

def save_normalization_info(normalization_info, filename):
    """
    Save normalization parameters and column information to a JSON file
    """
    with open(filename, 'w') as f:
        json.dump(normalization_info, f, indent=4)

def load_normalization_info(filename):
    """
    Load normalization parameters and column information from a JSON file
    """
    with open(filename, 'r') as f:
        return json.load(f)

def denormalize_predictions(predictions, normalization_info):
    """
    Convert predictions back to original scale
    """
    params = normalization_info['params']['buy_price']
    min_val = params['min']
    max_val = params['max']
    target_min = params['target_min']
    target_max = params['target_max']
    
    return min_val + (predictions - target_min) * (max_val - min_val) / (target_max - target_min)

def denormalize_feature(value, feature_name, normalization_info):
    """
    Denormalize a single feature value
    """
    if feature_name not in normalization_info['params']:
        return value
    
    params = normalization_info['params'][feature_name]
    return params['min'] + (value - params['target_min']) * \
           (params['max'] - params['min']) / (params['target_max'] - params['target_min'])

df = pd.read_csv('filtered_dataset.csv')

df_processed_tanh, params_tanh = clean_and_normalize_data(df, normalization_range=(-1, 1))
save_normalization_info(params_tanh, 'normalization_params_tanh.json')

df_processed_sigmoid, params_sigmoid = clean_and_normalize_data(df, normalization_range=(0, 1))
save_normalization_info(params_sigmoid, 'normalization_params_sigmoid.json')

df_processed_tanh.to_csv('processed_housing_data_tanh.csv', index=False)
df_processed_sigmoid.to_csv('processed_housing_data_sigmoid.csv', index=False)