<a href="https://colab.research.google.com/github/KucharskiR/data-science/blob/main/LSTM_v_1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Sieć neuronowa LSTM**

Budowa sieci neuronowej LSTM

#### **Spis treści:**
1. [Import bibliotek](#a0)
2. [Przygotowanie danych](#a1)
3. [Konfiguracja LSTM layer](#a2)
4. [Główny model](#a3)
5. [Klasyfikacja](#a4)
6. [Przykład budowy modelu z kursu](#a5)
7. [Ocena modelu + wykresy](#a6)
8. [Predykcja na podstawie modelu](#a7)
9. [Zip file](#a8)
10. [Extract .tar.gz](#a9)

### <a name='a0'></a> Import bibliotek

In [None]:
import csv
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.optimizers import Adam
from keras.callbacks import LambdaCallback
from keras.initializers import TruncatedNormal
from keras.models import save_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
import os

np.set_printoptions(precision=12, suppress=True, linewidth=150)
pd.options.display.float_format = '{:.6f}'.format
# sns.set()   <--- seaborn
print(np.__version__)

### <a name='a1'></a> Przygotowanie danych

In [None]:
# Replace 'your_file.csv' with the actual file path
file_path = '49f.csv'
file_labels = '49.csv'

# Specify the CSV file name
csv_file_name = 'my_data1.csv'

# Assuming there are three features in your data
num_features = 8
num_labels = 2
num_samples = 1980
timestepsPerSample = 120
epochs = 1
batch = 60

STEPS_PER_EPOCH = num_samples/batch

# Read the .csv file and create an array
data_strings = np.genfromtxt(file_path, delimiter=';')
labels_strings = np.genfromtxt(file_labels,delimiter=';')

# Convert from strings to float and int
X = data_strings.astype(float).reshape((-1,timestepsPerSample,num_features))
Y = labels_strings.astype(float).reshape((num_samples,num_labels))
print(X.shape)

# splitting the dataset 75% for training and 25% testing
# X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, shuffle=False)

# X_train = X_train.to_numpy()
x_train = X_train

### <a name='a4'></a> Konfiguracja LSTM layer

In [None]:
LstmLayer = LSTM(
    units=200,
    activation="tanh",
    recurrent_activation="sigmoid",
    use_bias=True,
    kernel_initializer="glorot_uniform",
    recurrent_initializer="orthogonal",
    bias_initializer="zeros",
    unit_forget_bias=True,
    kernel_regularizer=None,
    recurrent_regularizer=None,
    bias_regularizer=None,
    activity_regularizer=None,
    kernel_constraint=None,
    recurrent_constraint=None,
    bias_constraint=None,
    dropout=0.0,
    recurrent_dropout=0.0,
    seed=None,
    return_sequences=False,
    return_state=False,
    go_backwards=False,
    stateful=False,
    unroll=False,
    input_shape=(x_train.shape[1],x_train.shape[2])
)

### <a name='a3'></a> Główny model

In [None]:
lst = Sequential() # initializing model

# input layer and LSTM layer with 50 neurons
# lst.add(LSTM(units=300, return_sequences=False, input_shape=(x_train.shape[1],x_train.shape[2])))
lst.add(LstmLayer)
lst.add(Dense(100, activation='relu'))
lst.add(Dense(50, activation='relu'))
lst.add(Dense(25, activation='relu'))
# outpute layer with sigmoid activation
lst.add(Dense(num_labels, activation='sigmoid'))

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*100,
  decay_rate=1,
  staircase=False)

def get_optimizer():
  return tf.keras.optimizers.Adam(lr_schedule)

optimizer = get_optimizer()

# defining loss function, optimizer, metrics and then compiling model
lst.compile(loss='binary_crossentropy',optimizer=optimizer,metrics=['accuracy'])
lst.summary()

# training the model on training dataset
# history = lst.fit(x_train, y_train, epochs=epochs, batch_size=batch,validation_split=0.2)
history = lst.fit(x_train, y_train, epochs=epochs, batch_size=batch, validation_data=(X_test, y_test))

# Save model
lst.save("lstm_Model.keras", overwrite=True, save_format='keras',)

# x_test = np.reshape(X_test, (X_test.shape[0],timestepsPerSample,X_test.shape[1]))
x_test = X_test

# predicting target attribute on testing dataset
predict = lst.predict(x_test)
# predict = np.argmax(lst.predict(x_test), axis=-1)

# Set print options to suppress scientific notation
np.set_printoptions(suppress=True)

# Concatenate arrays
result = np.hstack((predict, y_test))
print(result)

test_results = lst.evaluate(x_test, y_test, verbose=1)
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}%')

# Create a DataFrame from the 2D array
df = pd.DataFrame(predict, columns=['Column1', 'Column2'])

# Export the DataFrame to CSV with semicolon as the delimiter and avoiding scientific notation
# df.to_csv(csv_file_name, sep=';', index=False, float_format='%.0f')
df.to_csv(csv_file_name, sep=';', index=False)

# Read the CSV file into a DataFrame without header
df = pd.read_csv(csv_file_name, sep=';', header=None)

# Drop the first row containing data
df = df.iloc[1:]

# Save the modified DataFrame back to the CSV file without header
df.to_csv(csv_file_name, sep=';', index=False, header=False)

print(f'CSV file name: {csv_file_name}.')



### <a name='a4'></a> Klasyfikacja

In [None]:
# np.info(predict)
# np.where(y_pred > threshold, 1,0)
predict_classes = predict
# predict_classes = np.argmax(predict_classes, axis=-1)
predict_classes = np.where(predict > 0.5, 1,0)
np.info(predict_classes)
predict_classes
# predict

In [None]:
predict = lst.predict(x_test)
predict

# # Set print options to suppress scientific notation
# np.set_printoptions(suppress=True)

# # Concatenate arrays
# result = np.hstack((predict, y_test))
# print(result)

# test_results = lst.evaluate(x_test, y_test, verbose=1)
# print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}%')

In [None]:
from keras.utils import to_categorical

y_train = to_categorical(y_train, num_classes=10)
y_test_cat = to_categorical(y_test, num_classes=10)

### <a name='a5'></a> Przykład budowy modelu z kursu

In [None]:
# model = Sequential()
# model.add(Flatten(input_shape=(28, 28)))
# model.add(Dense(units=128, activation='relu'))
# model.add(Dense(units=10, activation='softmax'))

# model.compile(optimizer='rmsprop',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# model.summary()
model = Sequential()
model.add(LstmLayer)
model.add(Dense(units=2, activation='softmax')) # <----- output layer

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

**epochs** - ile razy zestaw treningowy zostanie przetworzony przez model. Przy każdej iteracji optymalizator próbuje dopasować wagi, aby funkcja celu została zminimalizowana.

**batch_size** - liczba przykładów treningowych po której następuje aktualizacji wag

**validation_split** - procent danych użytych do walidacji

In [None]:
metrics = pd.DataFrame(history.history)
metrics['epoch'] = history.epoch
metrics

### <a name='a6'></a> Ocena modelu LSTM

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['accuracy'], name='accuracy'), row=1, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['loss'], name='loss'), row=1, col=2)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_accuracy'], name='val_accuracy'), row=1, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_loss'], name='val_loss'), row=1, col=2)

fig.update_xaxes(title_text='epochs')
fig.update_yaxes(title_text='accuracy')
fig.update_layout(width=1000, title='Accuracy and Loss')
fig.show()

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=0)
print(test_acc)

### <a name='a7'></a> Predykcja na podstawie modelu:



1.   **model.evaluate(y_true, y_pred)** - pozwala obliczyć metryki modelu
2.   **model.predict_classes()** - pozwala zwrócić odpowiednio przewidziane klasy
3.   **model.predict_proba(), model.predict()** - pozwala zwrócić prawdopodobieństwo danej klasy





In [None]:
predictions = model.predict(X_test)
predictions

In [None]:
# predictions_cls = model.predict_classes(X_test)
predictions_cls = np.argmax(model.predict(X_test), axis=-1)
predictions_cls

In [None]:
metrics = pd.DataFrame(history.history)
metrics['epoch'] = history.epoch
metrics

In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
model = tf.keras.models.load_model('lstm_Model.keras')
model.summary()

### <a name='a8'></a> Zip file

In [None]:
from zipfile import ZipFile

zip = ZipFile('my_python_files.zip','w')
zip.write('lstm_Model.keras')

### <a name='a9'></a> Extract tar gz

In [None]:
# importing the "tarfile" module
import tarfile

# open file
file = tarfile.open('gfg.tar.gz')

# extracting a specific file
file.extract('sample.txt', './content')

file.close()