# CS5228 Rental Prediction

In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# load data
train_data = pd.read_csv('./data/processed/train.csv')
test_data = pd.read_csv('./data/processed/test.csv')

X_train, y_train = train_data.drop(columns='monthly_rent'), train_data['monthly_rent'].copy()
X_test = test_data

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [4]:

# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Elastic Net': ElasticNet(),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42, verbose=1),
    'Random Forest': RandomForestRegressor(random_state=42, verbose=1, n_jobs=-1),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42, verbose=1, n_estimators=300, max_depth=4, learning_rate=0.15, subsample=0.8, max_features=0.8),
    # 'SVM': SVR(),
}

# train models and save results
for name, model in models.items():
    print(f'Training {name}...')
    
    # Train model
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f'{name} training completed.')

    # Create a dataframe with two columns: Id & Predicted
    result_df = pd.DataFrame({
        'Id': range(len(y_pred)),
        'Predicted': y_pred
    })
    save_path = './data/predictions/' + name + '.csv'
    result_df.to_csv(save_path, index=False)
    print(f'{name} training result saved.\n')

Training Linear Regression...
Linear Regression training completed.
Linear Regression training result saved.

Training Elastic Net...
Elastic Net training completed.
Elastic Net training result saved.

Training Gradient Boosting...
      Iter       Train Loss      OOB Improve   Remaining Time 
         1      428402.0293       59344.6650           40.98s
         2      383360.6869       37570.2770           39.54s
         3      352093.8604       34212.2632           35.98s
         4      326951.2757       19487.4451           34.83s
         5      309183.6291       19326.6161           33.93s
         6      296014.6186       19883.6203           33.32s
         7      284161.5803        9762.3773           32.89s
         8      272746.9503        -183.0310           32.48s
         9      269125.7007       20152.9672           32.28s
        10      260476.4373       -5226.8048           31.99s
        20      237276.9671       -7087.7901           29.15s
        30      230414.

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.8s


Random Forest training completed.
Random Forest training result saved.



[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    9.6s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.1s finished
