```markdown
This is just a notebook version of the train.py file. Use this to ensure training works as intended
```

In [1]:
# Library Import
import pandas as pd
from utils.Dataloader import PricingWizardDataset
from utils.DataTransformation import base_regression_pipeline, ridge_regression_pipeline
from models.base_linear_regression import linear_regression
from models.regularized_regression import regularized_regression

In [2]:
# Load data
data = PricingWizardDataset()

Dataset Loaded: post_preprocessing_without_dummies
	Number of Rows: 283055
	Number of Columns: 22
	Outlier Removal: True
	Train Size: 0.8
	Test Size: 0.2
	Random State: 42


## Base Linear Regression

In [3]:
# 1. Data transformations
print('Applying data preparation...')    
data.apply_function(base_regression_pipeline)
print('Done.')

Applying data preparation...
Done.


In [4]:
# 2. Stratify Split
data.stratify_train_test_split(y_column='log_listing_price', 
                               val_size=0,
                               return_splits=False)

Dependent variable distribution is equal across all subsets


In [5]:
# 3. Train model
results = linear_regression(data)

# 4. Print results
print('Test Results:')
print('R2 Score:', results['r2'])
print('MSE:', results['mse'])
print('MAE', results['mae'])
print('RMSE', results['rmse'])

Test Results:
R2 Score: 0.5452212770709474
MSE: 0.4381693860784058
MAE 0.5134875030165035
RMSE 0.6619436426754213


## Regularizated Regression

In [6]:
# Reset Dataset
data.reset_dataset()

# 1. Data transformations
print('Applying data preparation...')    
data.apply_function(ridge_regression_pipeline)
print('Done.')

Applying data preparation...
Done.


In [7]:
# 2. Stratify Split
data.stratify_train_test_split(y_column='log_listing_price', 
                               val_size=0,
                               return_splits=False)

Dependent variable distribution is equal across all subsets


In [8]:
# Make predictions on the test data
results = regularized_regression(data, -1)

Training model using GridSearchCV: regularized_regression
Fitting 5 folds for each of 13 candidates, totalling 65 fits
[CV] END ........................alpha=0.0031622776601683794; total time=  30.5s
[CV] END ........................alpha=0.0031622776601683794; total time=  30.4s
[CV] END ........................................alpha=0.001; total time=  30.6s
[CV] END .........................................alpha=0.01; total time=  30.9s
[CV] END .........................................alpha=0.01; total time=  31.3s
[CV] END ........................alpha=0.0031622776601683794; total time=  31.3s
[CV] END ........................................alpha=0.001; total time=  31.3s
[CV] END ........................................alpha=0.001; total time=  31.5s
[CV] END ........................................alpha=0.001; total time=  31.7s
[CV] END ........................alpha=0.0031622776601683794; total time=  31.7s
[CV] END ........................alpha=0.0031622776601683794; total tim

In [16]:
# 4. Print results
print('Test Results:')
print('R2 Score:', results['r2'])
print('MSE:', results['mse'])
print('MAE', results['mae'])
print('RMSE', results['rmse'])

Test Results:
R2 Score: 0.5695965129992877
MSE: 0.4146843776034087
MAE 0.4972702697816776
RMSE 0.6439599192522845
