# Students:

### Le Hoang Sang

### Vu Dinh Chuong

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import plotly.graph_objects as go
import torch.optim as optim
from torchsummary import summary
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.base import MultiOutputMixin, RegressorMixin, BaseEstimator, _fit_context
from abc import ABCMeta, abstractmethod
from sklearn.utils.validation import check_is_fitted, _check_sample_weight
from sklearn.utils.extmath import safe_sparse_dot
from numbers import Integral

# CLean data

In [None]:
def clean_data(df):
    print("Initial data information:")
    print(df.info())

    # Remove unnecessary columns (if there are columns named 'Unnamed')
    df = df.loc[:, ~df.columns.get_level_values(1).str.contains('^Unnamed')]

    # Handle the 'timestamp' column (if present)
    if ('timestamp', '') in df.columns:
        # Convert to datetime format
        df[('timestamp', '')] = pd.to_datetime(df[('timestamp', '')], errors='coerce')  
        
        # Remove rows with missing timestamp values
        df = df.dropna(subset=[('timestamp', '')]) 

    # Fill missing values using forward fill and backward fill methods
    df = df.fillna(method='ffill').fillna(method='bfill')

    # Normalize 'close' values between 0 and 1 if present
    if ('close', '') in df.columns:
        df[('close', '')] = (df[('close', '')] - df[('close', '')].min()) / (df[('close', '')].max() - df[('close', '')].min())

    # Print out the data after cleaning
    print("Data after cleaning:")
    print(df.head())

    return df


### Explain forward fill and backward fill methods

#### FFill
data = [1, 2, NaN, NaN, 5]

filled_data = [1, 2, 2, 2, 5]

=> 'NaN' values replaced with the previous valid value (2).

#### BFill
data = [1, 2, NaN, NaN, 5]

filled_data = [1, 2, 5, 5, 5]

=>'NaN' values replaced with the next valid value (5).

## Explain Min-max Normalization

normalized_value= (max_value − min_value) / (original_value − min_value)

​
Example:

close = [50, 55, 60, 65, 70]

min(x)=50
max(𝑥)=70

With value 60:
x_normalized = (60−50) / (70−50) = 10 / 20 = 0.5

# Create features

In the DataFrame: The dataset we re working with a MultiIndex data structure, where the first level of the index (such as 'A', 'AAL', etc.) represents the stock symbol, and the second level represents the attributes of the stock (such as 'close', 'open', 'high', etc.).

'A' is a stock symbol representing a specific company or asset.

Other symbols like 'AAL', 'ZION', etc., represent different companies or assets.

Why set symbol = 'A'?

In our code, setting symbol = 'A' focuses on the data of a specific company or asset. 

Since our DataFrame contains data for multiple stock symbols, we need to specify which stock symbol we want to use for feature generation.

In [None]:
def create_features(df):
    df_features = df.copy()

    print("Column names in the DataFrame:", df_features.columns)

    # Define the symbol you want to work with (e.g., 'A', 'AAL', etc.)
    symbol = 'A'

    # Define the columns for the selected stock symbol
    numeric_cols = [(symbol, 'open'), (symbol, 'high'), (symbol, 'low'), (symbol, 'close'), (symbol, 'volume')]

    # Check if these columns exist in the DataFrame
    existing_cols = [col for col in numeric_cols if col in df_features.columns]

    if not existing_cols:
        raise ValueError("Required numeric columns not found in the DataFrame.")

    # Convert the necessary columns to numeric type
    df_features[existing_cols] = df_features[existing_cols].apply(pd.to_numeric, errors='coerce')

    # Remove rows with NaN values after conversion
    df_features = df_features.dropna()

    # Create features if the necessary columns exist
    if (symbol, 'close') in df_features.columns:
        df_features[(symbol, 'return')] = df_features[(symbol, 'close')].pct_change()
        df_features[(symbol, 'ma7')] = df_features[(symbol, 'close')].rolling(window=7).mean()
        df_features[(symbol, 'ma21')] = df_features[(symbol, 'close')].rolling(window=21).mean()
        df_features[(symbol, 'volatility')] = df_features[(symbol, 'close')].rolling(window=7).std()
    
    if (symbol, 'open') in df_features.columns and (symbol, 'close') in df_features.columns:
        df_features[(symbol, 'open_close_ratio')] = df_features[(symbol, 'open')] / df_features[(symbol, 'close')]
    
    if (symbol, 'high') in df_features.columns and (symbol, 'low') in df_features.columns:
        df_features[(symbol, 'high_low_ratio')] = df_features[(symbol, 'high')] / df_features[(symbol, 'low')]

    # Remove rows with NaN values due to calculations
    df_features = df_features.dropna()  

    print("DataFrame with new features:")
    print(df_features.head())

    return df_features

# Plot data

In [None]:
def plot_features(df_features, symbol='A'):
    # Ensure symbol exists in the DataFrame
    if symbol not in df_features.columns.get_level_values(0):
        raise ValueError(f"Symbol '{symbol}' not found in DataFrame columns.")

    # Plot the 'close' price
    plt.figure(figsize=(14, 7))
    plt.plot(df_features.index, df_features[(symbol, 'close')], label=f'{symbol} Close Price', color='b')
    plt.title(f'{symbol} Close Price Over Time')
    plt.xlabel('Time')
    plt.ylabel('Close Price')
    plt.legend()
    plt.show()

    # Plot the 7-day and 21-day moving averages
    plt.figure(figsize=(14, 7))
    plt.plot(df_features.index, df_features[(symbol, 'ma7')], label=f'{symbol} MA7', color='g')
    plt.plot(df_features.index, df_features[(symbol, 'ma21')], label=f'{symbol} MA21', color='r')
    plt.title(f'{symbol} 7-day and 21-day Moving Averages')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.legend()
    plt.show()

    # Plot the open/close ratio
    plt.figure(figsize=(14, 7))
    plt.plot(df_features.index, df_features[(symbol, 'open_close_ratio')], label=f'{symbol} Open/Close Ratio', color='orange')
    plt.title(f'{symbol} Open/Close Ratio Over Time')
    plt.xlabel('Time')
    plt.ylabel('Ratio')
    plt.legend()
    plt.show()

    # Plot the high/low ratio
    plt.figure(figsize=(14, 7))
    plt.plot(df_features.index, df_features[(symbol, 'high_low_ratio')], label=f'{symbol} High/Low Ratio', color='brown')
    plt.title(f'{symbol} High/Low Ratio Over Time')
    plt.xlabel('Time')
    plt.ylabel('Ratio')
    plt.legend()
    plt.show()

# Run flow

In [None]:
df = pd.read_csv('dataset.csv', index_col=0, header=[0, 1]).sort_index(axis=1)

In [None]:
df_cleaned = clean_data(df)



In [None]:
# Get 6 first columns
df_features = create_features(df_cleaned.iloc[:, :5])


In [None]:
# Get data for the symbol 'A'
df1 = df_features.loc[:, 'A']
# Get first 6 columns
df1 = df1.iloc[:, :5]
df1

In [None]:
# Stock symbol 'A' behaviour
df1.plot(subplots=True, figsize=(15, 15))
plt.suptitle('A stock attributes from 24/01/2014 to 25/06/2021', y=0.91)
plt.show()

In [None]:
# Split train from  11/09/2017 9:30 A.M. to 17/01/2018 11:50 A.M and test data  from 17/01/2018 11:51A.M. to 16/02/2018 03:59A.M
train = df1.loc['2017-11-12 09:30:00':'2018-01-30 11:50:00']
test = df1.loc['2018-01-30 11:51:00':'2018-02-16 03:59:00']
train.shape

In [None]:
# Standardize the data

scaler = StandardScaler()
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

# Create a function to create sequences of data

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Create sequences with a length of 10
seq_length = 11
X_train, y_train = create_sequences(train_scaled, seq_length)
X_test, y_test = create_sequences(test_scaled, seq_length)

# Create data loaders
train_data = TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train).float())
test_data = TensorDataset(torch.tensor(X_test).float(), torch.from_numpy(y_test).float())

batch_size = 64
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)


In [None]:
from torch import nn

class VanillaLSTM(nn.Module):
    def __init__(self, num_feature):
        super(VanillaLSTM, self).__init__()
        self.lstm  = nn.LSTM(num_feature,64,batch_first=True)
        self.fc    = nn.Linear(64,num_feature)
        
    def forward(self, x):
        output, (hidden, cell) = self.lstm(x)
        x = self.fc(hidden)
        return x

model = VanillaLSTM(5)

In [None]:
# Optiemizer and rmse loss function
import torch.optim as optim

class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

criterion = RMSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
def train(dataloader):
    epoch_loss = 0
    model.train()  
    
    for batch in dataloader:
        optimizer.zero_grad()          
        x,y = batch
        pred = model(x)
        loss = criterion(pred[0],y)        
        loss.backward()               
        optimizer.step()      
        epoch_loss += loss.item()  
        
    return epoch_loss

In [None]:
def evaluate(dataloader):
    epoch_loss = 0
    model.eval()  
    
    with torch.no_grad():
      for batch in dataloader:   
          x,y= batch
          pred = model(x)
          loss = criterion(pred[0],y)              
          epoch_loss += loss.item()  
        
    return epoch_loss / len(dataloader)

In [None]:
n_epochs = 600
best_valid_loss = float('inf')

for epoch in range(1, n_epochs + 1):
    train_loss = train(train_loader)
    valid_loss = evaluate(test_loader)
    
    #save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model, './checkpoint/saved_weights.pt')

    if (epoch % 200 == 0):
        print(f'\tEpoch: {epoch} | ' + f'\tTrain Loss: {train_loss:.5f} | ' + f'\tVal Loss: {valid_loss:.5f}\n')

In [None]:
model=torch.load('./checkpoint/saved_weights.pt')

In [None]:
X_test = torch.tensor(X_test).float()

with torch.no_grad():
  y_test_pred = model(X_test)

y_test_pred = y_test_pred.numpy()[0]

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, y_test_pred, multioutput='raw_values'))
mse = mean_squared_error(y_test, y_test_pred, multioutput='raw_values')
mae = mean_absolute_error(y_test, y_test_pred, multioutput='raw_values')
r2 = r2_score(y_test, y_test_pred, multioutput='raw_values')
index = ['RMSE','MSE', 'MAE', 'R2 score']

results = pd.DataFrame([rmse, mse, mae, r2], index=index, columns=['close','high','low','open','volume'])
results



train = df1.loc['2017-11-12 09:30:00':'2018-01-30 11:50:00'] 

test = df1.loc['2018-01-30 11:51:00':'2018-02-16 03:59:00']

<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>close</th>
      <th>high</th>
      <th>low</th>
      <th>open</th>
      <th>volume</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>RMSE</th>
      <td>0.045912</td>
      <td>0.043599</td>
      <td>0.044478</td>
      <td>0.040182</td>
      <td>0.671828</td>
    </tr>
    <tr>
      <th>MSE</th>
      <td>0.002108</td>
      <td>0.001901</td>
      <td>0.001978</td>
      <td>0.001615</td>
      <td>0.451353</td>
    </tr>
    <tr>
      <th>MAE</th>
      <td>0.024092</td>
      <td>0.018686</td>
      <td>0.021166</td>
      <td>0.014628</td>
      <td>0.253309</td>
    </tr>
    <tr>
      <th>R2 score</th>
      <td>0.998083</td>
      <td>0.998259</td>
      <td>0.998214</td>
      <td>0.998533</td>
      <td>0.272340</td>
    </tr>
  </tbody>
</table>
</div>

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=5)
fig.set_figwidth(25)
fig.set_figheight(5)
for i in range(5):
    ax[i].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
         y_test[:,i], color='black', label='test target')

    ax[i].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
         y_test_pred[:,i], color='red', label='test prediction')

    ax[i].set_xlabel('time [days]')
    ax[i].set_ylabel('price')
    ax[i].legend(loc='best')

ax[0].set_title('future close prices')
ax[1].set_title('future high prices')
ax[2].set_title('future low prices')
ax[3].set_title('future open prices')
ax[4].set_title('future volume prices')

plt.show()

In [49]:
# Predicting the next 1 mins
seq_len = 11
sequences = []
for index in range(len(test) - seq_len + 1): 
    sequences.append(test[index: index + seq_len])
sequences = np.array(sequences)

last_sequence = sequences[-1:, 1:, :]
last_sequence = torch.from_numpy(last_sequence).float()

# Generate predictions
PREDICT = 1
with torch.no_grad():
    for i in range(PREDICT):
        pred_i = model(last_sequence)
        last_sequence = torch.cat((last_sequence, pred_i), dim=1)
        last_sequence = last_sequence[:, 1:, :]

pred_mins = last_sequence.squeeze(0).numpy()

# inverse transform the predicted values
pred_mins = scaler.inverse_transform(pred_mins)

df_pred = pd.DataFrame(
    data=pred_mins,
    columns=['<Open>', '<High>', '<Low>', '<Close>', '<Volume>']
)

print(def_pred)

ValueError: operands could not be broadcast together with shapes (10,5) (10,) (10,5) 

In [50]:
# Predicting the next 5 mins

seq_len = 11
sequences = []
for index in range(len(test) - seq_len + 1): 
    sequences.append(test[index: index + seq_len])
sequences = np.array(sequences)

last_sequence = sequences[-1:, 1:, :]
last_sequence = torch.from_numpy(last_sequence).float()

# Generate predictions
PREDICT = 5
with torch.no_grad():
    for i in range(PREDICT):
        pred_i = model(last_sequence)
        last_sequence = torch.cat((last_sequence, pred_i), dim=1)
        last_sequence = last_sequence[:, 1:, :]

pred_mins = last_sequence.squeeze(0).numpy()

# inverse transform the predicted values
pred_mins = scaler.inverse_transform(pred_mins)

df_pred = pd.DataFrame(
    data=pred_mins,
    columns=['<Open>', '<High>', '<Low>', '<Close>', '<Volume>']
)

ValueError: operands could not be broadcast together with shapes (10,5) (10,) (10,5) 

In [51]:
# Predicting the next 10 mins

seq_len = 11
sequences = []
for index in range(len(test) - seq_len + 1): 
    sequences.append(test[index: index + seq_len])
sequences = np.array(sequences)

last_sequence = sequences[-1:, 1:, :]
last_sequence = torch.from_numpy(last_sequence).float()

# Generate predictions
PREDICT = 10
with torch.no_grad():
    for i in range(PREDICT):
        pred_i = model(last_sequence)
        last_sequence = torch.cat((last_sequence, pred_i), dim=1)
        last_sequence = last_sequence[:, 1:, :]

pred_mins = last_sequence.squeeze(0).numpy()

# inverse transform the predicted values
pred_mins = scaler.inverse_transform(pred_mins)

df_pred = pd.DataFrame(
    data=pred_mins,
    columns=['<Open>', '<High>', '<Low>', '<Close>', '<Volume>']
)

ValueError: operands could not be broadcast together with shapes (10,5) (10,) (10,5) 

In [None]:
# df_features = create_features(df_cleaned)

# print('features', df_features.head())

# plot_features(df_features, symbol='A')

## 10 Feature and technical indicator

In [None]:
# Add EMA12, EMA25, MACD, Boillinger Up and Boillinger Down to df2 dataframe
df2 = df_features.loc[:, 'A']
df2 = df2.iloc[:, :5]
df2['EMA12'] = df2['close'].ewm(span=12, adjust=False).mean()
df2['EMA26'] = df2['close'].ewm(span=26, adjust=False).mean()
df2['MACD'] = df2['EMA12'] - df2['EMA26']
df2['Boillinger Up'] = df2['close'].rolling(window=20).mean() + 2*df2['close'].rolling(window=20).std()
df2['Boillinger Down'] = df2['close'].rolling(window=20).mean() - 2*df2['close'].rolling(window=20).std()
df2

In [None]:
train2 = df2.loc['2017-11-12 09:30:00':'2018-01-30 11:50:00']
test2 = df2.loc['2018-01-30 11:51:00':'2018-02-16 03:59:00']
# Standardize the data

scaler = StandardScaler()
train_scaled2 = scaler.fit_transform(train2)
test_scaled2 = scaler.transform(test2)

# Create a function to create sequences of data

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Create sequences with a length of 10
seq_length = 11
X_train2, y_train2 = create_sequences(train_scaled2, seq_length)
X_test2, y_test2 = create_sequences(test_scaled2, seq_length)

# Create data loaders
train_data2 = TensorDataset(torch.tensor(X_train2).float(), torch.tensor(y_train2).float())
test_data2 = TensorDataset(torch.tensor(X_test2).float(), torch.from_numpy(y_test2).float())

batch_size = 64
train_loader2 = DataLoader(train_data2, shuffle=True, batch_size=batch_size)
test_loader2 = DataLoader(test_data2, shuffle=False, batch_size=batch_size)

In [None]:
n_epochs = 600
best_valid_loss = float('inf')

for epoch in range(1, n_epochs + 1):
    train_loss = train(train_loader2)
    valid_loss = evaluate(test_loader2)
    
    #save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model, './checkpoint/saved_weights2.pt')

    if (epoch % 200 == 0):
        print(f'\tEpoch: {epoch} | ' + f'\tTrain Loss: {train_loss:.5f} | ' + f'\tVal Loss: {valid_loss:.5f}\n')

In [None]:
model=torch.load('./checkpoint/saved_weights2.pt')

In [None]:
X_test2 = torch.tensor(X_test2).float()

with torch.no_grad():
  y_test_pred2 = model(X_test2)
  
y_test_pred2 = y_test_pred2.numpy()[0]

rmse = np.sqrt(mean_squared_error(y_test2, y_test_pred2, multioutput='raw_values'))
mse = mean_squared_error(y_test2, y_test_pred2, multioutput='raw_values')
mae = mean_absolute_error(y_test2, y_test_pred2, multioutput='raw_values')
r2 = r2_score(y_test2, y_test_pred2, multioutput='raw_values')
index = ['RMSE','MSE', 'MAE', 'R2 score']

results = pd.DataFrame([rmse, mse, mae, r2], index=index, columns=['close','high','low','open','volume', 'EMA12', 'EMA26', 'MACD', 'Boillinger Up', 'Boillinger Down'])
results

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=5)
fig.set_figwidth(25)
fig.set_figheight(5)
for i in range(5):
    ax[i].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
         y_test[:,i], color='black', label='test target')

    ax[i].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
         y_test_pred[:,i], color='red', label='test prediction')

    ax[i].set_xlabel('time [days]')
    ax[i].set_ylabel('price')
    ax[i].legend(loc='best')

ax[0].set_title('future close prices')
ax[1].set_title('future high prices')
ax[2].set_title('future low prices')
ax[3].set_title('future open prices')
ax[4].set_title('future volume prices')

# Show EMA12, EMA25, MACD, Boillinger Up and Boillinger Down base one the close price
fig, ax = plt.subplots(nrows=1, ncols=5)
fig.set_figwidth(25)
fig.set_figheight(5)

ax[0].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
          y_test[:,5], color='black', label='test target')

ax[0].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
          y_test_pred[:,5], color='red', label='test prediction')

ax[0].set_xlabel('time [days]')
ax[0].set_ylabel('price')
ax[0].legend(loc='best')
ax[0].set_title('future EMA12 prices')

ax[1].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
          y_test[:,6], color='black', label='test target')

ax[1].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
          y_test_pred[:,6], color='red', label='test prediction')

ax[1].set_xlabel('time [days]')
ax[1].set_ylabel('price')
ax[1].legend(loc='best')
ax[1].set_title('future EMA26 prices')

ax[2].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
          y_test[:,7], color='black', label='test target')  
ax[2].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]), 
          y_test_pred[:,7], color='red', label='test prediction')
ax[2].set_xlabel('time [days]') 
ax[2].set_ylabel('price')
ax[2].legend(loc='best')
ax[2].set_title('future MACD prices')

ax[3].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
          y_test[:,8], color='black', label='test target')
ax[3].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),
          y_test_pred[:,8], color='red', label='test prediction')
ax[3].set_xlabel('time [days]')
ax[3].set_ylabel('price')
ax[3].legend(loc='best')
ax[3].set_title('future Boillinger Up prices')

ax[4].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
          y_test[:,9], color='black', label='test target')
ax[4].plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test_pred.shape[0]),  
          y_test_pred[:,9], color='red', label='test prediction')
ax[4].set_xlabel('time [days]')
ax[4].set_ylabel('price') 
ax[4].legend(loc='best')
ax[4].set_title('future Boillinger Down prices')

plt.show()


In [None]:
# Predicting the next 10 mins
seq_len = 11
sequences = []
for index in range(len(test) - seq_len + 1): 
    sequences.append(test[index: index + seq_len])
sequences = np.array(sequences)

last_sequence = sequences[-1:, 1:, :]
last_sequence = torch.from_numpy(last_sequence).float()

# Generate predictions
PREDICT = 10
with torch.no_grad():
    for i in range(PREDICT):
        pred_i = model(last_sequence)
        last_sequence = torch.cat((last_sequence, pred_i), dim=1)
        last_sequence = last_sequence[:, 1:, :]

pred_mins = last_sequence.squeeze(0).numpy()

# inverse transform the predicted values
pred_mins = scaler.inverse_transform(pred_mins)

df_pred = pd.DataFrame(
    data=pred_mins,
    columns=['<Open>', '<High>', '<Low>', '<Close>', '<Volume>', '<EMA12>', '<EMA26>', '<MACD>', '<Boillinger Up>', '<Boillinger Down>']
)

print(def_pred)