# CS5228 Rental Prediction

In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# load data
train_data = pd.read_csv('./data/processed/train.csv')
test_data = pd.read_csv('./data/processed/test.csv')

X_train, y_train = train_data.drop(columns='monthly_rent'), train_data['monthly_rent'].copy()
X_test = test_data

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [4]:

# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Elastic Net': ElasticNet(),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42, verbose=1),
    'Random Forest': RandomForestRegressor(random_state=42, verbose=1, n_jobs=-1),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42, verbose=1),
    'SVM': SVR(),
}

# train models and save results
for name, model in models.items():
    print(f'Training {name}...')
    
    # Train model
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f'{name} training completed.')

    # Create a dataframe with two columns: Id & Predicted
    result_df = pd.DataFrame({
        'Id': range(len(y_pred)),
        'Predicted': y_pred
    })
    save_path = './data/predictions/' + name + '.csv'
    result_df.to_csv(save_path, index=False)
    print(f'{name} training result saved.\n')

Training Linear Regression...
Linear Regression training completed.
Linear Regression training result saved.

Training Elastic Net...
Elastic Net training completed.
Elastic Net training result saved.

Training Gradient Boosting...
      Iter       Train Loss   Remaining Time 
         1      452893.7174           11.81s
         2      424531.7705           10.96s
         3      401333.5645           10.05s
         4      382028.9000           10.05s
         5      366314.1370            9.72s
         6      352992.6863            9.56s
         7      341995.6910            9.27s
         8      332833.8340            9.05s
         9      325058.2537            8.80s
        10      318219.4335            8.61s
        20      283501.1700            7.22s
        30      270326.3257            6.05s
        40      262633.4454            5.04s
        50      257603.6594            4.13s
        60      254099.0730            3.26s
        70      251260.8389            2.42s
  

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.8s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.1s finished


Random Forest training completed.
Random Forest training result saved.

Training SVM...
SVM training completed.
SVM training result saved.

