In [49]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

In [50]:
def print_score(model, x, y):
    print('Score:        {:.4f}%'.format(model.score(x, y) * 100))
    print('Kaggle Score: {:.0f}'.format(np.sqrt(np.mean((model.predict(x) - y)**2))))   

In [51]:
df = pd.read_csv('data/stock_XY_train.csv')

# Custom Model

In [52]:
class CustomModel:
    
    def __init__(self):
        self.prediction = None
    
    def fit(self, x, y):
        pass
    
    def predict(self, x: np.ndarray):
        if self.prediction is None:
            self.prediction = np.random.choice([0, 1], size=len(x))
            
        return self.prediction
    
    def score(self, x, y):
        y_pred = self.predict(x)
        return sum([y_pred_point == y_point for y_pred_point, y_point in zip(y_pred, y)]) / len(y)
    

In [53]:
x_train, x_test, y_train, y_test = train_test_split(
    df.drop(['Buy'], axis=1),
    df['Buy'],
    test_size=0.4,
    random_state=42,
)

custom_model = CustomModel()
custom_model.fit(x_train, y_train)
print_score(custom_model, x_test, y_test)

Score:        50.0202%
Kaggle Score: 1


# Submission File Generation

In [60]:
dfTest = pd.read_csv('data/stock_X_test.csv').drop('Unnamed: 0', axis=1).rename({'Unnamed: 0.1': 'Unnamed: 0'}, axis='columns')

custom_model = CustomModel()
custom_model.fit(
    df.drop(['Buy'], axis=1),
    df['Buy']
)

dfTest['Buy'] = custom_model.predict(dfTest)
dfTest.head()

Unnamed: 0.1,Unnamed: 0,Ticker,Revenue,Revenue Growth,Cost of Revenue,Gross Profit,R&D Expenses,SG&A Expense,Operating Expenses,Operating Income,...,Receivables growth,Inventory Growth,Asset Growth,Book Value per Share Growth,Debt Growth,R&D Expense Growth,SG&A Expenses Growth,Sector,Yr,Buy
0,17685,CMCSA,94507000000.0,0.1115,0.0,94507000000.0,0.0,64822000000.0,75498000000.0,19009000000.0,...,0.257,0.0,0.3426,0.0722,0.7309,0.0,0.1308,Consumer Cyclical,18,1
1,17686,KMI,14144000000.0,0.032,7288000000.0,6856000000.0,0.0,601000000.0,3062000000.0,3794000000.0,...,0.0345,-0.092,-0.0024,0.0076,-0.0137,0.0,-0.1265,Energy,18,0
2,17687,INTC,70848000000.0,0.1289,27111000000.0,43737000000.0,13543000000.0,6750000000.0,20421000000.0,23316000000.0,...,0.1989,0.0387,0.0382,0.1014,-0.0169,0.039,-0.0942,Technology,18,0
3,17688,MU,30391000000.0,0.4955,12500000000.0,17891000000.0,2141000000.0,813000000.0,2897000000.0,14994000000.0,...,0.4573,0.1511,0.2275,0.6395,-0.5841,0.1738,0.0942,Technology,18,1
4,17689,GE,121615000000.0,0.0285,95461000000.0,26154000000.0,0.0,18111000000.0,40711000000.0,-14557000000.0,...,-0.2781,-0.2892,-0.1575,-0.4487,-0.2297,0.0,0.0308,Industrials,18,1


In [62]:
dfTest[['Unnamed: 0', 'Buy']].to_csv('submission.csv', index=None, header=True)
