In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import OrderedDict
sns.set_context('poster')

# Importing from my own modules
import sys
sys.path.append('../financial_forecasting/')
from utils import load_data, wMSE, train_and_test_models
from preprocessing import Imputer, LogTransformer, MeanEncoder, TreeBinner, GroupStats

# Load preprocessed data

In [2]:
X_train = pd.read_csv('../data/preprocessed/train.csv')
X_val = pd.read_csv('../data/preprocessed/validation.csv')
X_test = pd.read_csv('../data/preprocessed/test.csv')

weights_train = pd.read_csv('../data/preprocessed/train_weights.csv', squeeze=True)
weights_val = pd.read_csv('../data/preprocessed/validation_weights.csv', squeeze=True)

y_train = pd.read_csv('../data/preprocessed/train_target.csv', squeeze=True)
y_val = pd.read_csv('../data/preprocessed/validation_target.csv', squeeze=True)

# Null model

In [3]:
class NullModel():
    def __init__(self):
        self.stock_mean = None
    
    def fit(self, df):
        stock_mean = df.groupby('Stock')['y'].mean()
        self.stock_mean = stock_mean
        
    def predict(self, df):
        preds = df['Stock'].map(self.stock_mean).rename('y')
        preds.fillna(0)
        return preds

In [4]:
scale = 1
df = pd.concat([X_train, y_train], axis=1)

# Training
clf = NullModel()
clf.fit(df)

# Testing
preds_train = clf.predict(X_train)
preds_val = clf.predict(X_val)

train_error = wMSE(preds=preds_train, y=y_train, weights=weights_train)
val_error =  wMSE(preds=preds_val, y=y_val, weights=weights_val)

print('Train error: {} Test error: {} \n'.format(train_error * scale, val_error * scale))

Train error: 1.3026031058363355e-06 Test error: 1.265388501519961e-06 

