In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Linear Regression Model
from sklearn.linear_model import LinearRegression 
# to save model
import joblib
# use for standardization 
from scipy import stats
# load API
import jpx_tokyo_market_prediction

In [None]:
# load csv files
stock_prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")
secondary_stock_prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/secondary_stock_prices.csv")
supplemental_prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/stock_prices.csv")
supplemental_secondary_stock_prices = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/supplemental_files/secondary_stock_prices.csv")
# Combine stock prices data
stock_prices = stock_prices.append(secondary_stock_prices)
stock_prices = stock_prices.append(supplemental_prices)
stock_prices = stock_prices.append(supplemental_secondary_stock_prices)

In [None]:
# featuring for train data
def featuring_train(data):
    # string to datetime
    data['Date'] = pd.to_datetime(data['Date'])
    # Fill in the blanks with zeros.
    data['ExpectedDividend'] = data['ExpectedDividend'].fillna(0)
    data['Target'] = data['Target'].fillna(0)
    # Convert boolean to numbers
    data["SupervisionFlag"] = data["SupervisionFlag"].astype(int)
    
    # Fill missing values
    cols = ['Open', 'High', 'Low', 'Close']
    data.loc[:,cols] = data.loc[:,cols].ffill()
    data.loc[:,cols] = data.loc[:,cols].bfill()

    # Calculate Daily_Range and Mean
    data['Daily_Range'] = data['Close'] - data['Open']
    data['Mean'] = (data['High']+data['Low']) / 2
    data['Mean'] = data['Mean'].astype(int)
    
    # Standardization 
    data['Open'] = stats.zscore(data['Open'])
    data['High'] = stats.zscore(data['High'])
    data['Low'] = stats.zscore(data['Low'])
    data['Close'] = stats.zscore(data['Close'])
    data['Volume'] = stats.zscore(data['Volume'])
    data['Daily_Range'] = stats.zscore(data['Daily_Range'])
    data['Mean'] = stats.zscore(data['Mean'])
    
    # drop unused data
    data = data.drop(['RowId'], axis=1)
    
    return data

In [None]:
# featuring
data = featuring_train(stock_prices)

In [None]:
# split data
data_train = data[data['Date']<'2022-04-01']
data_test = data[data['Date']>'2022-04-01']
# reset index for test data
data_test = data_test.reset_index(drop=True)
# drop unused data
data_train = data_train.drop(['Date'], axis=1)
data_test = data_test.drop(['Date'], axis=1)

In [None]:
# Separation into learning features and objective variables
X_train = data_train.drop(['Target'], axis=1)
y_train = data_train['Target']
X_test = data_test.drop(['Target'], axis=1)
y_test = data_test['Target']

In [None]:
# Instantiation and Learning
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# model detail
print(model.coef_) # Partial Regression Coefficients
print(model.intercept_) # Intercept of the regression line
print(model.get_params()) # Get parameters
print(model.predict(X_test)) # Predicted values
print(model.score(X_test, y_test)) # coefficient of determination

In [None]:
# save model
joblib.dump(model, 'regression_model.learn') 

In [None]:
# featureing for test data
def featuring_test(data):
    # Fill in the blanks with zeros.
    data['ExpectedDividend'] = data['ExpectedDividend'].fillna(0)
    # Convert boolean to numbers
    data["SupervisionFlag"] = data["SupervisionFlag"].astype(int)
    
    # Fill missing values
    cols = ['Open', 'High', 'Low', 'Close']
    data.loc[:,cols] = data.loc[:,cols].ffill()
    data.loc[:,cols] = data.loc[:,cols].bfill()

    # Calculate Daily_Range and Mean
    data['Daily_Range'] = data['Close'] - data['Open']
    data['Mean'] = (data['High']+data['Low']) / 2
    data['Mean'] = data['Mean'].astype(int)
    
    # Standardization 
    data['Open'] = stats.zscore(data['Open'])
    data['High'] = stats.zscore(data['High'])
    data['Low'] = stats.zscore(data['Low'])
    data['Close'] = stats.zscore(data['Close'])
    data['Volume'] = stats.zscore(data['Volume'])
    data['Daily_Range'] = stats.zscore(data['Daily_Range'])
    data['Mean'] = stats.zscore(data['Mean'])
    
    # drop unused data
    data = data.drop(['RowId', 'Date'], axis=1)
    
    return data

In [None]:
# make API environment 
env = jpx_tokyo_market_prediction.make_env()
# get iterator
iter_test = env.iter_test()

In [None]:
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    prices.head()
    # featuring
    x_test = featuring_test(prices) 
    # prediction
    y_pred = model.predict(x_test)
    sample_prediction['Target'] = y_pred
    # sort in descending order by Target
    sample_prediction = sample_prediction.sort_values(by = "Target", ascending = False)
    # add Rank
    sample_prediction['Rank'] = np.arange(len(sample_prediction.index))
    sample_prediction = sample_prediction.sort_values(by = "SecuritiesCode", ascending = True)
    sample_prediction.drop(["Target"], axis = 1)
    submission = sample_prediction[["Date", "SecuritiesCode", "Rank"]]
    # register your predictions
    env.predict(submission)

In [None]:
print(submission)