Importing Libraries

In [19]:
import os
import tensorflow as tf
# For data maniplution
import pandas as pd
import numpy as np
# for data visualization
import seaborn as sns
import matplotlib.pyplot as plt
# for confusion Matrix and Train_test
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import FunctionTransformer
from sklearn.base import clone
from sklearn.pipeline import make_pipeline

import lightgbm as lgbm
from lightgbm import *

from xgboost import XGBClassifier
from xgboost import XGBRegressor
from xgboost import plot_importance

from catboost import CatBoostRegressor

import logging
# Set LightGBM logging level to suppress info logs
lgb_logger = logging.getLogger('lightgbm')
lgb_logger.setLevel(logging.WARNING)

In [2]:
# load train dataset to dataframe
data_train = pd.read_csv('train.csv')

In [39]:
data_test = pd.read_csv('test.csv').drop(['row_id', 'time_id'], axis=1)

In [None]:
# displaying the first 10 rows
data_train.head(10)

###Exploratory Data Analysis

In [None]:
#getting information about the train data
data_train.info()

In [None]:
# getting descriptive statistics about the train data
data_train.describe()

In [None]:
# Shape of the data: number of rows and columns
data_train.shape

In [None]:
# checking for null values
data_train.isna().sum()

In [None]:
# plotting the heatmap for the train dataset
plt.figure(figsize=(16,16))
sns.heatmap(data_train.corr(),annot=True,cmap='crest')
plt.title('Heatmap of the tain dataset')
plt.show()

In [None]:
#Visualize the time series data
plt.figure(figsize=(10, 6))
plt.plot(data_train['target'], label='Target')
plt.title('Time Series Data')
plt.legend()
plt.show()

In [None]:
# dropping all the null values and assigning it to a new dataframe
data_train_wo_na = data_train.dropna(axis=0)

In [None]:
# checking if there is still any null data present
data_train_wo_na.isna().sum()

In [None]:
# This is one more method to deal with the missing values for all the columns with simpleImputer.
from sklearn.impute import SimpleImputer

# Save the original data types
original_dtypes = data_train.dtypes

# Create a SimpleImputer instance
imp_mean = SimpleImputer(strategy='most_frequent')

# Fit and transform the imputer on the original DataFrame
imputed_train_array = imp_mean.fit_transform(data_train)

# Convert the NumPy array back to a DataFrame
imputed_train_df = pd.DataFrame(imputed_train_array, columns=data_train.columns)

# Convert columns back to their original data types
imputed_train_df = imputed_train_df.astype(original_dtypes)

# Now, 'imputed_train_df' is your DataFrame with missing values imputed
# imputed_train_df.head(20)
imputed_train_df.info()



In [3]:
train_df_0 = data_train.copy()
train_df_0.fillna(0, inplace=True)
train_df_0.head(10)

Unnamed: 0,stock_id,date_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,bid_size,ask_price,ask_size,wap,target,time_id,row_id
0,0,0,0,3180602.69,1,0.999812,13380276.64,0.0,0.0,0.999812,60651.5,1.000026,8493.03,1.0,-3.029704,0,0_0_0
1,1,0,0,166603.91,-1,0.999896,1642214.25,0.0,0.0,0.999896,3233.04,1.00066,20605.09,1.0,-5.519986,0,0_0_1
2,2,0,0,302879.87,-1,0.999561,1819368.03,0.0,0.0,0.999403,37956.0,1.000298,18995.0,1.0,-8.38995,0,0_0_2
3,3,0,0,11917682.27,-1,1.000171,18389745.62,0.0,0.0,0.999999,2324.9,1.000214,479032.4,1.0,-4.0102,0,0_0_3
4,4,0,0,447549.96,-1,0.999532,17860614.95,0.0,0.0,0.999394,16485.54,1.000016,434.1,1.0,-7.349849,0,0_0_4
5,5,0,0,0.0,0,1.000635,13552875.92,0.0,0.0,0.999779,1962.72,1.000635,5647.65,1.0,6.779432,0,0_0_5
6,6,0,0,969969.4,1,1.000115,3647503.98,0.0,0.0,0.999506,6663.16,1.000283,3810.48,1.0,-2.499819,0,0_0_6
7,7,0,0,9412959.1,1,0.999818,21261245.87,0.0,0.0,0.999741,5139.2,1.00013,2570.6,1.0,-1.959801,0,0_0_7
8,8,0,0,2394875.85,1,0.999916,9473209.08,0.0,0.0,0.999022,52011.6,1.000041,2169.36,1.0,-5.970001,0,0_0_8
9,9,0,0,3039700.65,-1,1.000969,6248958.45,0.0,0.0,0.999354,6191.0,1.000646,6199.0,1.0,7.970333,0,0_0_9


In [12]:
train_df_mean = data_train.copy()
train_df_mean = train_df_mean.drop(['row_id', 'time_id'], axis = 1)
train_df_mean.fillna(train_df_mean.mean(), inplace=True)
train_df_mean.head(10)

Unnamed: 0,stock_id,date_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,bid_size,ask_price,ask_size,wap,target
0,0,0,0,3180602.69,1,0.999812,13380276.64,1.001713,0.99966,0.999812,60651.5,1.000026,8493.03,1.0,-3.029704
1,1,0,0,166603.91,-1,0.999896,1642214.25,1.001713,0.99966,0.999896,3233.04,1.00066,20605.09,1.0,-5.519986
2,2,0,0,302879.87,-1,0.999561,1819368.03,1.001713,0.99966,0.999403,37956.0,1.000298,18995.0,1.0,-8.38995
3,3,0,0,11917682.27,-1,1.000171,18389745.62,1.001713,0.99966,0.999999,2324.9,1.000214,479032.4,1.0,-4.0102
4,4,0,0,447549.96,-1,0.999532,17860614.95,1.001713,0.99966,0.999394,16485.54,1.000016,434.1,1.0,-7.349849
5,5,0,0,0.0,0,1.000635,13552875.92,1.001713,0.99966,0.999779,1962.72,1.000635,5647.65,1.0,6.779432
6,6,0,0,969969.4,1,1.000115,3647503.98,1.001713,0.99966,0.999506,6663.16,1.000283,3810.48,1.0,-2.499819
7,7,0,0,9412959.1,1,0.999818,21261245.87,1.001713,0.99966,0.999741,5139.2,1.00013,2570.6,1.0,-1.959801
8,8,0,0,2394875.85,1,0.999916,9473209.08,1.001713,0.99966,0.999022,52011.6,1.000041,2169.36,1.0,-5.970001
9,9,0,0,3039700.65,-1,1.000969,6248958.45,1.001713,0.99966,0.999354,6191.0,1.000646,6199.0,1.0,7.970333


In [17]:
train_df_mode = data_train.copy()
train_df_mode = train_df_mode.drop(['row_id', 'time_id'], axis = 1)
mode_values = train_df_mode.mode().iloc[0]
train_df_mode.fillna(mode_values, inplace=True)
train_df_mode.head(10)

Unnamed: 0,stock_id,date_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,bid_size,ask_price,ask_size,wap,target
0,0,0,0,3180602.69,1,0.999812,13380276.64,1.0,1.0,0.999812,60651.5,1.000026,8493.03,1.0,-3.029704
1,1,0,0,166603.91,-1,0.999896,1642214.25,1.0,1.0,0.999896,3233.04,1.00066,20605.09,1.0,-5.519986
2,2,0,0,302879.87,-1,0.999561,1819368.03,1.0,1.0,0.999403,37956.0,1.000298,18995.0,1.0,-8.38995
3,3,0,0,11917682.27,-1,1.000171,18389745.62,1.0,1.0,0.999999,2324.9,1.000214,479032.4,1.0,-4.0102
4,4,0,0,447549.96,-1,0.999532,17860614.95,1.0,1.0,0.999394,16485.54,1.000016,434.1,1.0,-7.349849
5,5,0,0,0.0,0,1.000635,13552875.92,1.0,1.0,0.999779,1962.72,1.000635,5647.65,1.0,6.779432
6,6,0,0,969969.4,1,1.000115,3647503.98,1.0,1.0,0.999506,6663.16,1.000283,3810.48,1.0,-2.499819
7,7,0,0,9412959.1,1,0.999818,21261245.87,1.0,1.0,0.999741,5139.2,1.00013,2570.6,1.0,-1.959801
8,8,0,0,2394875.85,1,0.999916,9473209.08,1.0,1.0,0.999022,52011.6,1.000041,2169.36,1.0,-5.970001
9,9,0,0,3039700.65,-1,1.000969,6248958.45,1.0,1.0,0.999354,6191.0,1.000646,6199.0,1.0,7.970333


In [None]:
imputed_train_df.isna().sum()

In [None]:
imputed_train_df.duplicated().sum()

In [None]:
# checking outliers in the data
plt.figure(figsize=(6,6))
plt.title("Boxplot to detect outlier in the train data", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
sns.boxplot(x=imputed_train_df['target'])
plt.show()

In [None]:
def find_outliers(dataframe, attribute):
    #Determine the number of rows containing outliers
    # compute the 25th percentile value in target
    percentile25 = dataframe[attribute].quantile(0.25)

    # compute the 75th percentile value in target
    percentile75 = dataframe[attribute].quantile(0.75)

    #compute the interquatile range in target
    iqr = percentile75 - percentile25

    #define upper limit and lower limit for non-outlier values
    upper_limit = percentile75 + (1.5 * iqr)
    lower_limit = percentile25 - (1.5 * iqr)
    print("Upper Limit:", upper_limit)
    print("Lower_Limit:", lower_limit)

    #identify the subset pf data containing outliers in target
    outliers = dataframe[(dataframe[attribute] > upper_limit) | (dataframe[attribute] < lower_limit)]

    # Count how many rows in the data contain outliers in `target`
    print("Number of rows in the data containing outliers in ", attribute, ":", len(outliers))


In [None]:
find_outliers(imputed_train_df, "target")

In [None]:
find_outliers(imputed_train_df, 'wap')


In [None]:
# # Log transformation
imputed_log_train_df = imputed_train_df.copy()
imputed_log_train_df['target'] = np.log1p(pd.to_numeric(imputed_log_train_df['target'], errors='coerce'))
# data_train['target']= np.log1p(data_train['target'])

In [23]:
train_df_0_log = train_df_0.copy()
train_df_0_log['target'] = np.log1p(pd.to_numeric(train_df_0_log['target'], errors='coerce'))


  result = getattr(ufunc, method)(*inputs, **kwargs)


In [26]:
from sklearn.preprocessing import RobustScaler

# Apply robust scaling
scaler = RobustScaler()

In [None]:
imputed_rob_train_df = imputed_train_df.copy()
imputed_rob_train_df['target']= scaler.fit_transform(imputed_rob_train_df['target'].values.reshape(-1, 1))

In [27]:
train_df_0_rob = train_df_0.copy()
train_df_0_rob['target']= scaler.fit_transform(train_df_0_rob['target'].values.reshape(-1, 1))


In [None]:
find_outliers(imputed_log_train_df, 'target')

In [None]:
find_outliers(imputed_rob_train_df, 'target')

In [None]:
imputed_rob_train_df.nunique()

In [None]:
imputed_log_train_df.nunique()

In [None]:
print(data_train['imbalance_buy_sell_flag'].value_counts())

In [None]:
imputed_train_df = imputed_train_df.drop(['row_id', 'time_id'], axis = 1)
imputed_log_train_df = imputed_log_train_df.drop(['row_id', 'time_id'], axis = 1)
imputed_rob_train_df = imputed_rob_train_df.drop(['row_id', 'time_id'], axis = 1)

In [4]:
train_df_0 = train_df_0.drop(['row_id', 'time_id'], axis = 1)

In [7]:
# imputed_train_df_X = imputed_train_df[~imputed_train_df.target.isna()]
# imputed_train_df_Y = imputed_train_df_X.pop('target')

# imputed_log_train_df_X = imputed_log_train_df[~imputed_log_train_df.target.isna()]
# imputed_log_train_df_Y = imputed_log_train_df_X.pop('target')

# imputed_rob_train_df_X = imputed_rob_train_df[~imputed_rob_train_df.target.isna()]
# imputed_rob_train_df_Y = imputed_rob_train_df_X.pop('target')

seed = 69
tss = TimeSeriesSplit(10)

os.environ['PYTHONHASHSEED'] = '69'
tf.keras.utils.set_random_seed(seed)

In [33]:
train_df_0_X = train_df_0[~train_df_0.target.isna()]
train_df_0_Y = train_df_0_X.pop('target')

In [13]:
def imbalance_calculator(x):
    
    list_of_features = ['seconds_in_bucket', 'imbalance_buy_sell_flag', 'imbalance_size', 'matched_size', 'bid_size', 'ask_size',
                'reference_price','far_price', 'near_price', 'ask_price', 'bid_price', 'wap', 'imb_s1', 'imb_s2']
    
    x_copy = x.copy()
    
    x_copy['imb_s1'] = x.eval('(bid_size - ask_size) / (bid_size + ask_size)')
    x_copy['imb_s2'] = x.eval('(imbalance_size - matched_size) / (matched_size + imbalance_size)')
    
    list_of_prices = ['reference_price','far_price', 'near_price', 'ask_price', 'bid_price', 'wap']
    
    for i,a in enumerate(list_of_prices):
        for j,b in enumerate(list_of_prices):
            if i>j:
                x_copy[f'{a}_{b}_imb'] = x.eval(f'({a} - {b}) / ({a} + {b})')
                list_of_features.append(f'{a}_{b}_imb')
                    
    for i,a in enumerate(list_of_prices):
        for j,b in enumerate(list_of_prices):
            for k,c in enumerate(list_of_prices):
                if i>j and j>k:
                    max_ = x[[a,b,c]].max(axis=1)
                    min_ = x[[a,b,c]].min(axis=1)
                    mid_ = x[[a,b,c]].sum(axis=1)-min_-max_

                    x_copy[f'{a}_{b}_{c}_imb2'] = (max_-mid_)/(mid_-min_)
                    list_of_features.append(f'{a}_{b}_{c}_imb2')
    
    return x_copy[list_of_features]

ImbalanceCalculator = FunctionTransformer(imbalance_calculator)

In [None]:
def train_predict_impute(estimator, cv = tss, label = ''):
    
    X = data_train[~data_train.target.isna()].drop(['row_id', 'time_id'], axis = 1)
    y = X.pop('target')
    
    #initiate prediction arrays and score lists
    val_predictions = np.zeros((len(X)))
    #train_predictions = np.zeros((len(sample)))
    train_scores, val_scores = [], []
    
    #training model, predicting prognosis probability, and evaluating metrics
    for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
        
        model = clone(estimator)
        
        #define train set
        X_train = X.iloc[train_idx]
        y_train = y.iloc[train_idx]
        
        #define validation set
        X_val = X.iloc[val_idx]
        y_val = y.iloc[val_idx]
        
        #train model
        model.fit(X_train, y_train)
        
        #make predictions
        train_preds = model.predict(X_train)
        val_preds = model.predict(X_val)
                  
        val_predictions[val_idx] += val_preds
        
        #evaluate model for a fold
        train_score = mean_absolute_error(y_train, train_preds)
        val_score = mean_absolute_error(y_val, val_preds)
        
        #append model score for a fold to list
        train_scores.append(train_score)
        val_scores.append(val_score)
    print('Imputed Train Dataframe')
    print(f'Val Score: {np.mean(val_scores):.5f} ± {np.std(val_scores):.5f} | Train Score: {np.mean(train_scores):.5f} ± {np.std(train_scores):.5f} | {label}')
    
    return val_scores, val_predictions

In [35]:
def train_predict_0(estimator, cv = tss, label = ''):
    
    X = train_df_0[~train_df_0.target.isna()]
    y = X.pop('target')
    
    #initiate prediction arrays and score lists
    val_predictions = np.zeros((len(X)))
    #train_predictions = np.zeros((len(sample)))
    train_scores, val_scores = [], []
    
    #training model, predicting prognosis probability, and evaluating metrics
    for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
        
        model = clone(estimator)
        
        #define train set
        X_train = X.iloc[train_idx]
        y_train = y.iloc[train_idx]
        
        #define validation set
        X_val = X.iloc[val_idx]
        y_val = y.iloc[val_idx]
        
        #train model
        model.fit(X_train, y_train)
        
        #make predictions
        train_preds = model.predict(X_train)
        val_preds = model.predict(X_val)
                  
        val_predictions[val_idx] += val_preds
        
        #evaluate model for a fold
        train_score = mean_absolute_error(y_train, train_preds)
        val_score = mean_absolute_error(y_val, val_preds)
        
        #append model score for a fold to list
        train_scores.append(train_score)
        val_scores.append(val_score)
    print('Train Dataframe with NA replaced with 0')
    print(f'Val Score: {np.mean(val_scores):.5f} ± {np.std(val_scores):.5f} | Train Score: {np.mean(train_scores):.5f} ± {np.std(train_scores):.5f} | {label}')
    
    return val_scores, val_predictions

In [14]:
def train_predict_mean(estimator, cv = tss, label = ''):
    
    X = train_df_mean[~train_df_mean.target.isna()]
    y = X.pop('target')
    
    #initiate prediction arrays and score lists
    val_predictions = np.zeros((len(X)))
    #train_predictions = np.zeros((len(sample)))
    train_scores, val_scores = [], []
    
    #training model, predicting prognosis probability, and evaluating metrics
    for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
        
        model = clone(estimator)
        
        #define train set
        X_train = X.iloc[train_idx]
        y_train = y.iloc[train_idx]
        
        #define validation set
        X_val = X.iloc[val_idx]
        y_val = y.iloc[val_idx]
        
        #train model
        model.fit(X_train, y_train)
        
        #make predictions
        train_preds = model.predict(X_train)
        val_preds = model.predict(X_val)
                  
        val_predictions[val_idx] += val_preds
        
        #evaluate model for a fold
        train_score = mean_absolute_error(y_train, train_preds)
        val_score = mean_absolute_error(y_val, val_preds)
        
        #append model score for a fold to list
        train_scores.append(train_score)
        val_scores.append(val_score)
    print('Train Dataframe with NA replaced with mean')
    print(f'Val Score: {np.mean(val_scores):.5f} ± {np.std(val_scores):.5f} | Train Score: {np.mean(train_scores):.5f} ± {np.std(train_scores):.5f} | {label}')
    
    return val_scores, val_predictions

In [18]:
def train_predict_mode(estimator, cv = tss, label = ''):
    
    X = train_df_mode[~train_df_mode.target.isna()]
    y = X.pop('target')
    
    #initiate prediction arrays and score lists
    val_predictions = np.zeros((len(X)))
    #train_predictions = np.zeros((len(sample)))
    train_scores, val_scores = [], []
    
    #training model, predicting prognosis probability, and evaluating metrics
    for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
        
        model = clone(estimator)
        
        #define train set
        X_train = X.iloc[train_idx]
        y_train = y.iloc[train_idx]
        
        #define validation set
        X_val = X.iloc[val_idx]
        y_val = y.iloc[val_idx]
        
        #train model
        model.fit(X_train, y_train)
        
        #make predictions
        train_preds = model.predict(X_train)
        val_preds = model.predict(X_val)
                  
        val_predictions[val_idx] += val_preds
        
        #evaluate model for a fold
        train_score = mean_absolute_error(y_train, train_preds)
        val_score = mean_absolute_error(y_val, val_preds)
        
        #append model score for a fold to list
        train_scores.append(train_score)
        val_scores.append(val_score)
    print('Train Dataframe with NA replaced with mode')
    print(f'Val Score: {np.mean(val_scores):.5f} ± {np.std(val_scores):.5f} | Train Score: {np.mean(train_scores):.5f} ± {np.std(train_scores):.5f} | {label}')
    
    return val_scores, val_predictions

In [36]:
models = [
    # ('XGBoost', XGBRegressor(random_state = seed, objective = 'reg:absoluteerror', tree_method = 'gpu_hist', missing = np.nan)),
    ('LightGBM', LGBMRegressor(random_state = seed, objective = 'mae', device_type = 'gpu')),
    # ('CatBoost', CatBoostRegressor(random_state = seed, objective = 'MAE', verbose = 0))
]

for (label, model) in models:
    ans = train_predict_0(
        make_pipeline(
            ImbalanceCalculator,
            model
        ),
        label = label
    )

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 12043
[LightGBM] [Info] Number of data points in the train set: 476180, number of used features: 49
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 49 dense feature groups (23.61 MB) transferred to GPU in 0.036600 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score -0.060201
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 12043
[LightGBM] [Info] Number of data points in the train set: 952360, number of used features: 49
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8


In [89]:

model = make_pipeline(
    ImbalanceCalculator,
    LGBMRegressor(random_state = seed, objective = 'mae', device_type = 'gpu', n_estimators=500)
)

model.fit(train_df_0_X, train_df_0_Y)

sample_prediction = pd.read_csv('sample_submission.csv')
sample_prediction['target'] = model.predict(data_test)
    

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 12043
[LightGBM] [Info] Number of data points in the train set: 5237980, number of used features: 49
[LightGBM] [Info] Using GPU Device: Intel(R) UHD Graphics, Vendor: Intel(R) Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 49 dense feature groups (259.76 MB) transferred to GPU in 0.392861 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score -0.060201


In [53]:
print(sample_prediction)

       time_id       row_id    target
0        26290      478_0_0 -1.026933
1        26290      478_0_1  2.105765
2        26290      478_0_2  2.651761
3        26290      478_0_3 -1.230203
4        26290      478_0_4 -1.264654
...        ...          ...       ...
32995    26454  480_540_195 -1.502805
32996    26454  480_540_196 -1.918891
32997    26454  480_540_197  0.124417
32998    26454  480_540_198  1.337283
32999    26454  480_540_199 -2.973901

[33000 rows x 3 columns]


In [75]:
sample_prediction.isna().sum()

time_id    0
row_id     0
target     0
dtype: int64

In [97]:
revealed_data = pd.read_csv('revealed_targets.csv')

In [80]:
revealed_data.isna().sum()

stock_id             0
date_id              0
seconds_in_bucket    0
time_id              0
revealed_target      0
revealed_date_id     0
revealed_time_id     0
dtype: int64

In [98]:
revealed_data_target = revealed_data['revealed_target']
revealed_data_target.head()

0    -2.310276
1   -12.850165
2    -0.439882
3     7.259846
4     4.780292
Name: revealed_target, dtype: float64

In [104]:
predicted_values = sample_prediction['target']


In [105]:
iqbal = pd.read_csv('C:/Users/avani/Downloads/submission.csv')
iqbaltarget = iqbal['target']


In [106]:
mae = mean_absolute_error(iqbaltarget, revealed_data_target)

# Print MAE
print(f'Mean Absolute Error: {mae:.5f}')

Mean Absolute Error: 5.49364


In [107]:
mae = mean_absolute_error(revealed_data_target, predicted_values)

# Print MAE
print(f'Mean Absolute Error: {mae:.5f}')

Mean Absolute Error: 5.81679


In [108]:
mae = mean_absolute_error(iqbaltarget, predicted_values)

# Print MAE
print(f'Mean Absolute Error: {mae:.5f}')

Mean Absolute Error: 1.77453
