In [None]:
import numpy as np
import pandas as pd
import sklearn.linear_model as lm
import sklearn.preprocessing as pre
import sklearn.model_selection as ms
import matplotlib.pyplot as plt
import seaborn as sns

import harness

np.warnings.filterwarnings('ignore', 'Ill-conditioned matrix')

In [None]:
flights_train = pd.read_csv('data/flights_train.csv', index_col=0)
flights_test = pd.read_csv('data/flights_test.csv', index_col=0)

In [None]:
flights_clean = harness.clean_train(flights_train)

In [None]:
add_features = harness.chain(
    harness.add_date_parts, harness.add_haul, harness.add_weather
)

In [None]:
flights_with_features = add_features(flights_clean)

In [None]:
x_transform = harness.chain(
    harness.keep_only_test_columns,
    add_features,
    [harness.add_all_grouped_stats, flights_with_features],
    harness.only_numeric,
    harness.scale,
)

In [None]:
transformer = harness.DataTransformer(
    x_transform
)

In [None]:
x_train, y_train = transformer.extract_transform(flights_clean)

In [None]:
def train_model():
    linreg = lm.LinearRegression()
    linreg.fit(x_train.values, y_train.values)
    harness.save(linreg, 'everything_linear_test_model')

In [None]:
#train_model()

In [None]:
linreg = harness.load('everything_linear_test_model')
linreg_model = harness.TrainedModel(linreg, transformer)

In [None]:
linreg.score(x_train, y_train)

In [None]:
linreg_model.validate(flights_clean)

In [None]:
transformer.score(linreg, x_train, y_train)