In [13]:
import pandas as pd

from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

In [14]:
def transform(file_path):
    df = pd.read_csv(file_path)

    # Tính giá trị biến động giá cổ phiếu = Giá Open sau - Giá Close hiện tại
    df['fluctuation'] = df['Open'] - df['Close'].shift(1)

    # Tính khoảng thời gian từ phiên hiện tại đến phiên sau (đơn vị minutes)
    df['Date/Time'] = pd.to_datetime(df['Date/Time'].str.strip())
    df['time_diff'] = df['Date/Time'] - df['Date/Time'].shift(1) 
    df['mins_diff'] = df['time_diff'] / pd.Timedelta(minutes=1)

    # Giá Close và Volume hiện tại 
    df['close_previous'] = 0 + df['Close'].shift(1)
    df['volume_previous'] = 0 + df['Volume'].shift(1)

    # Bỏ các row có nan
    df2 = df.dropna()

    return df2


In [15]:
dataset_name = ['FPT', 'MSN', 'PNJ', 'VIC']
df = pd.DataFrame([])

for name in dataset_name:
    file_path = f"dataset/{name}.csv"
    # Transform dataframe
    df_tmp = transform(file_path)
    # merge all dataframes into 1 dataframe
    df = pd.concat([df, df_tmp])   

In [16]:
# Use one-hot-encoding
le = LabelEncoder()
df['Ticker'] = le.fit_transform(df['Ticker'])

ticker = {
    'FPT': 0,
    'MSN': 1, 
    'PNJ': 2,
    'VIC': 3
}

In [17]:
# Convert numeric to string
df2 = df.copy()
df2['fluctuation'] = df2['fluctuation'].round(2)
df2['fluctuation'] = df2['fluctuation'].astype('str')

In [18]:
x = df2[['Ticker', 'mins_diff', 'close_previous']]
y = df2['fluctuation']

scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2)

In [51]:
model = LogisticRegression()
model.fit(x_train, y_train)
model.score(x_test, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.5765726539453776

In [19]:
import datetime
import pickle

In [53]:
# save model
# with open('model', 'wb') as f:   # wb là write binary file
#     pickle.dump(model, f)

In [20]:
def convert_datetime(date_time):
    format = '%Y-%m-%d %H:%M:%S'
    datetime_str = datetime.datetime.strptime(date_time, format)
    return datetime_str

In [21]:
def prediction(dateClose, dateOpen, ticker_name, priceClose):
    open_date = convert_datetime(dateOpen)
    close_date = convert_datetime(dateClose)
    
    time_diff = open_date - close_date
    mins_diff = round(time_diff.total_seconds() / 60)

    # open saved model
    with open('model', 'rb') as f:
        model = pickle.load(f)

    pred = model.predict([[ticker[ticker_name], mins_diff, priceClose]])
    return pred

In [56]:
dateClose = '2018-12-28 09:37:00'
dateOpen = '2018-12-28 09:39:00'
ticker_name = 'FPT' 
priceClose = 30.89

fluctuation_pred = prediction(dateClose, dateOpen, ticker_name, priceClose)
print(fluctuation_pred)

['-0.4']


In [57]:
result = float(fluctuation_pred[0])

In [58]:
result

-0.4

In [22]:
from tensorflow import keras
from sklearn.preprocessing import LabelBinarizer

In [23]:
# transfer one-hot-encoding
label_as_binary = LabelBinarizer()
train__y_labels = label_as_binary.fit_transform(y_train)
train__y_labels.shape

(367464, 547)

In [24]:
x_train.shape

(367464, 3)

In [25]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(3,)),
    keras.layers.Dense(1000, activation='relu'),
    keras.layers.Dense(800, activation='relu'),
    keras.layers.Dense(547, activation='sigmoid')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # category classification
model.fit(x_train, train__y_labels, epochs=5, batch_size=8)

print(model.evaluate(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


ValueError: in user code:

    File "e:\Tools\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1972, in test_function  *
        return step_function(self, iterator)
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1956, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1944, in run_step  **
        outputs = model.test_step(data)
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1852, in test_step
        self.compute_loss(x, y, y_pred, sample_weight)
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1139, in compute_loss
        return self.compiled_loss(
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\engine\compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\losses.py", line 142, in __call__
        losses = call_fn(y_true, y_pred)
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\losses.py", line 268, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\losses.py", line 2122, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "e:\Tools\anaconda3\lib\site-packages\keras\src\backend.py", line 5560, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 547) are incompatible


In [26]:
#save model
with open('neuralNetwork_model', 'wb') as f:   # wb là write binary file
    pickle.dump(model, f)