# Submission

In [12]:
import numpy as np
import pandas as pd

from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_squared_log_error

import utils.visualization_tb as vi
import utils.mining_data_tb as md

import warnings

warnings.filterwarnings("ignore")

In [3]:
### Train data
train = pd.read_csv("data/train.csv", index_col = 0)
train_data, train_target = train.drop("SalePrice", axis = 1), train.loc[:, "SalePrice"]

### Test data
test_data = pd.read_csv("data/test.csv", index_col = 0)
# Ids to differentiate test data from train data
ids = test_data.index

# Now let's join the data
dataset = pd.concat([train_data, test_data], axis = 0)
dataset = md.ready_to_use(dataset)

# Finally, I split the data again between train data (to train the model)
# and test data (for the Kaggle submission)
train_data = dataset.drop(ids)
test_data = dataset.loc[ids, :]

In [16]:
seed = 42
scaler = MinMaxScaler()
models = Ridge(alpha = 10), Lasso(alpha = 10)

#### Training
for model in models:
    # Target and independent variables
    train_data = np.array(train_data)
    train_data = scaler.fit_transform(train_data)
    train_target = np.array(train_target)

    model.fit(train_data, train_target)

    #### Prediction
    test_data = np.array(test_data)
    test_data = scaler.fit_transform(test_data)

    predictions = model.predict(test_data)

    #### Submission
    submission = pd.DataFrame(predictions, index = ids, columns = ["SalePrice"])
    submission.to_csv(f"submissions/submission_{str(model)}.csv")