<a href="https://colab.research.google.com/github/KucharskiR/data-science/blob/main/LSTM_v_1_1_1x150_trained.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Sieć neuronowa LSTM**

Budowa sieci neuronowej LSTM

#### **Spis treści:**
1. [Import bibliotek](#a0)
2. [Przygotowanie danych](#a1)
3. [Konfiguracja LSTM layer](#a2)
4. [Główny model](#a3)
5. [Klasyfikacja](#a4)
6. [Przykład budowy modelu z kursu](#a5)
7. [Ocena modelu + wykresy](#a6)
8. [Predykcja na podstawie modelu](#a7)
9. [Zip file](#a8)
10. [Extract .tar.gz](#a9)

### <a name='a0'></a> Import bibliotek

In [2]:
import csv
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.optimizers import Adam
from keras.callbacks import LambdaCallback
from keras.initializers import TruncatedNormal
from keras.models import save_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
import os

np.set_printoptions(precision=12, suppress=True, linewidth=150)
pd.options.display.float_format = '{:.6f}'.format
# sns.set()   <--- seaborn
print(np.__version__)

1.25.2


### <a name='a1'></a> Przygotowanie danych

In [9]:
# Replace 'your_file.csv' with the actual file path
file_features = './1_150x9/1_150x9f.csv'
file_labels = './1_150x9/1_150x9l.csv'

# Specify the CSV file name
csv_file_name = 'my_data1.csv'

# Assuming there are three features in your data
num_features = 9
num_labels = 2
timestepsPerSample = 150
epochs = 5
batch = 32

# Read the .csv file and create an array
data_strings = np.genfromtxt(file_features, delimiter=';')
labels_strings = np.genfromtxt(file_labels,delimiter=';')

# Convert from strings to float and int
X = data_strings.astype(float).reshape((-1,timestepsPerSample,num_features))
Y = labels_strings.astype(float).reshape((-1,3))
print(X.shape)
print(Y.shape)

(33300, 150, 9)
(33300, 3)


In [4]:
# splitting the dataset 75% for training and 25% testing
# X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=42)
x_train, x_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.15, shuffle=False)
print(x_train.shape)
# print(x_train[:5])

(28305, 150, 9)


In [5]:
y_train = Y_train[:, 0:2]
y_test = Y_test[:, 0:2]
print(y_train[:2])
print(y_test[:2])

[[0. 1.]
 [0. 1.]]
[[0. 1.]
 [0. 1.]]


### <a name='a4'></a> Konfiguracja LSTM layer

In [6]:
LstmLayer = LSTM(
    units=200,
    activation="tanh",
    recurrent_activation="sigmoid",
    use_bias=True,
    kernel_initializer="glorot_uniform",
    recurrent_initializer="orthogonal",
    bias_initializer="zeros",
    unit_forget_bias=True,
    kernel_regularizer=None,
    recurrent_regularizer=None,
    bias_regularizer=None,
    activity_regularizer=None,
    kernel_constraint=None,
    recurrent_constraint=None,
    bias_constraint=None,
    dropout=0.0,
    recurrent_dropout=0.0,
    seed=None,
    return_sequences=False,
    return_state=False,
    go_backwards=False,
    stateful=False,
    unroll=False,
    input_shape=(x_train.shape[1],x_train.shape[2])
)

### <a name='a3'></a> Główny model

In [10]:
num_samples = x_train.shape[0]
STEPS_PER_EPOCH = num_samples/batch
num_samples

28305

In [11]:
model = Sequential() # initializing model

# input layer and LSTM layer with 50 neurons
# lst.add(LSTM(units=300, return_sequences=False, input_shape=(x_train.shape[1],x_train.shape[2])))
model.add(LstmLayer)
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
# outpute layer with sigmoid activation
model.add(Dense(num_labels, activation='sigmoid'))

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*100,
  decay_rate=1,
  staircase=False)

def get_optimizer():
  return tf.keras.optimizers.Adam(lr_schedule)

optimizer = get_optimizer()

# defining loss function, optimizer, metrics and then compiling model
model.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 200)               168000    
                                                                 
 dense_4 (Dense)             (None, 100)               20100     
                                                                 
 dense_5 (Dense)             (None, 50)                5050      
                                                                 
 dense_6 (Dense)             (None, 25)                1275      
                                                                 
 dense_7 (Dense)             (None, 2)                 52        
                                                                 
Total params: 194477 (759.68 KB)
Trainable params: 194477 (759.68 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Training

In [12]:
# training the model on training dataset
# history = lst.fit(x_train, y_train, epochs=epochs, batch_size=batch,validation_split=0.2)
history = model.fit(x_train, y_train, epochs=epochs, batch_size=batch, validation_data=(x_test, y_test))

# Save model
model.save("1_150x_trained.h5", overwrite=True, save_format='h5')

# predicting target attribute on testing dataset
predict = model.predict(x_test)
# predict = np.argmax(lst.predict(x_test), axis=-1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  saving_api.save_model(




In [None]:
# Set print options to suppress scientific notation
np.set_printoptions(suppress=True)

# Concatenate arrays
result = np.hstack((predict, y_test))
print(result)

test_results = model.evaluate(x_test, y_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}%')

# # Create a DataFrame from the 2D array
# df = pd.DataFrame(predict, columns=['Column1', 'Column2'])

# # Export the DataFrame to CSV with semicolon as the delimiter and avoiding scientific notation
# # df.to_csv(csv_file_name, sep=';', index=False, float_format='%.0f')
# df.to_csv(csv_file_name, sep=';', index=False)

# # Read the CSV file into a DataFrame without header
# df = pd.read_csv(csv_file_name, sep=';', header=None)

# # Drop the first row containing data
# df = df.iloc[1:]

# # Save the modified DataFrame back to the CSV file without header
# df.to_csv(csv_file_name, sep=';', index=False, header=False)

# print(f'CSV file name: {csv_file_name}.')



### <a name='a4'></a> Predict i Klasyfikacja

In [13]:
predict = model.predict(x_test)
predict[:3]



array([[0.7343818 , 0.26479626],
       [0.6977063 , 0.30362442],
       [0.2923241 , 0.69213045]], dtype=float32)

In [14]:
# np.info(predict)
# np.where(y_pred > threshold, 1,0)
predict_classes = predict
# predict_classes = np.argmax(predict_classes, axis=-1)
predict_classes = np.where(predict > 0.5, 1,0)
np.info(predict_classes)
predict_classes[:3]
# predict

class:  ndarray
shape:  (4995, 2)
strides:  (16, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x581c7d005180
byteorder:  little
byteswap:  False
type: int64


array([[1, 0],
       [1, 0],
       [0, 1]])

**epochs** - ile razy zestaw treningowy zostanie przetworzony przez model. Przy każdej iteracji optymalizator próbuje dopasować wagi, aby funkcja celu została zminimalizowana.

**batch_size** - liczba przykładów treningowych po której następuje aktualizacji wag

**validation_split** - procent danych użytych do walidacji

In [15]:
metrics = pd.DataFrame(history.history)
metrics['epoch'] = history.epoch
metrics

Unnamed: 0,loss,accuracy,val_loss,val_accuracy,epoch
0,0.586502,0.696167,0.568456,0.707507,0
1,0.565181,0.711252,0.571732,0.6999,1
2,0.560698,0.715704,0.565571,0.705505,2
3,0.559514,0.717258,0.564298,0.707107,3
4,0.552211,0.719802,0.569226,0.702502,4


### <a name='a6'></a> Ocena modelu LSTM

In [16]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['accuracy'], name='accuracy'), row=1, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['loss'], name='loss'), row=1, col=2)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_accuracy'], name='val_accuracy'), row=1, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_loss'], name='val_loss'), row=1, col=2)

fig.update_xaxes(title_text='epochs')
fig.update_yaxes(title_text='accuracy')
fig.update_layout(width=1000, title='Accuracy and Loss')
fig.show()

In [18]:
# nie wiem co to za blok
from keras.utils import to_categorical

y_train = to_categorical(y_train, num_classes=10)
y_test_cat = to_categorical(y_test, num_classes=10)

In [20]:
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(test_acc)

0.7025024890899658


### <a name='a7'></a> Graf zielono czerwony

In [21]:
# Połączenie dwóch tablic
concat = np.hstack((predict_classes, Y_test[:,2:]))
print(concat.shape)
print(concat[:3])

(4995, 3)
[[ 1.    0.   79.8 ]
 [ 1.    0.   80.04]
 [ 0.    1.   80.  ]]


In [22]:
import plotly.express as px
import pandas as pd

# Tworzenie Date Frame
df = pd.DataFrame(concat, columns=['Sell', 'Buy', 'Price'])
df.head(5)

Unnamed: 0,Sell,Buy,Price
0,1.0,0.0,79.8
1,1.0,0.0,80.04
2,0.0,1.0,80.0
3,0.0,1.0,80.14
4,0.0,1.0,80.31


In [23]:
import plotly.graph_objects as go
import pandas as pd# Tworzenie wykresu liniowego

# Tworzenie wykresu liniowego
fig = go.Figure()

# Dodawanie linii do wykresu
for i in range(1, len(df)):
    color = 'green' if df.at[i, 'Buy'] == 1 else 'red'
    fig.add_trace(go.Scatter(x=[i-1, i], y=[df.at[i-1, 'Price'], df.at[i, 'Price']], line=dict(color=color), showlegend=False))

# Ustawienia osi i tytuł
fig.update_layout(
    xaxis_title='Indeks',
    yaxis_title='Close',
    title='Wykres liniowy z kolorami'
)

# Wyświetlanie wykresu
fig.show()

### <a name='a7'></a> Predykcja na podstawie modelu:



1.   **model.evaluate(y_true, y_pred)** - pozwala obliczyć metryki modelu
2.   **model.predict_classes()** - pozwala zwrócić odpowiednio przewidziane klasy
3.   **model.predict_proba(), model.predict()** - pozwala zwrócić prawdopodobieństwo danej klasy





In [None]:
predictions = model.predict(x_test)
predictions

In [None]:
# predictions_cls = model.predict_classes(X_test)
predictions_cls = np.argmax(model.predict(x_test), axis=-1)
predictions_cls

In [None]:
metrics = pd.DataFrame(history.history)
metrics['epoch'] = history.epoch
metrics

In [None]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
model = tf.keras.models.load_model('lstm_Model.keras')
model.summary()

### <a name='a5'></a> Przykład budowy modelu z kursu

In [None]:
# model = Sequential()
# model.add(Flatten(input_shape=(28, 28)))
# model.add(Dense(units=128, activation='relu'))
# model.add(Dense(units=10, activation='softmax'))

# model.compile(optimizer='rmsprop',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# model.summary()
model = Sequential()
model.add(LstmLayer)
model.add(Dense(units=2, activation='softmax')) # <----- output layer

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

### <a name='a8'></a> Zip file

In [None]:
from zipfile import ZipFile

zip = ZipFile('my_python_files.zip','w')
zip.write('lstm_Model.keras')

### <a name='a9'></a> Extract tar gz

In [1]:
# importing the "tarfile" module
import tarfile

# open file
file = tarfile.open('1_150x9.tar.gz')

# extracting a specific file
file.extractall(path='./1_150x9/')

file.close()