In [16]:
import tensorflow as tf
from datetime import datetime, timedelta
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
from IPython.display import clear_output
import os
import traceback
from tensorflow.keras import layers, models

In [17]:
class TransformerModel(tf.keras.Model):
    def __init__(self,
                 input_dim,
                 output_dim,
                 output_dim_s,
                 d_model,
                 num_heads,
                 dff,
                 max_seq_len,
                 max_seq_len_out,
                 num_layers,
                 rate
                ):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.input_shapes = max_seq_len
        self.output_shapes = max_seq_len
        self.input_embedding = tf.keras.layers.Dense(d_model, input_shape=(
            max_seq_len, input_dim))
        
        self.positional_encoding_in = self._get_positional_encoding(
            max_seq_len, d_model
        )
        
        #enc
        self.multihead_attentions = [tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model) 
                                     for _ in range(num_layers)]
        self.add_norm_1_layers = [tf.keras.layers.LayerNormalization(epsilon=1e-6) 
                                  for _ in range(num_layers)]
        self.feed_forward_layers = [self._get_feed_forward(dff=dff, d_model=d_model) 
                                     for _ in range(num_layers)]
        self.add_norm_2_layers = [tf.keras.layers.LayerNormalization(epsilon=1e-6) 
                                  for _ in range(num_layers)]
        #dec
        self.output_embedding = tf.keras.layers.Dense(
            d_model, input_shape=(
                max_seq_len,
                output_dim
            )
        )
        self.positional_encoding_out = self._get_positional_encoding(
            max_seq_len, d_model
        )
        self.multihead_attentions_dec_mask = [tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model) 
                                     for _ in range(num_layers)]
        self.add_norm_1_layers_dec = [tf.keras.layers.LayerNormalization(epsilon=1e-6) 
                                  for _ in range(num_layers)]
        self.multihead_attentions_dec = [tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model) 
                                     for _ in range(num_layers)]
        self.add_norm_2_layers_dec = [tf.keras.layers.LayerNormalization(epsilon=1e-6) 
                                  for _ in range(num_layers)]
        self.feed_forward_layers_dec = [self._get_feed_forward(dff=dff, d_model=d_model) 
                                     for _ in range(num_layers)]
        self.add_norm_3_layers_dec = [tf.keras.layers.LayerNormalization(epsilon=1e-6) 
                                  for _ in range(num_layers)]
        
        self.final_layer = tf.keras.Sequential([
            tf.keras.layers.Input(shape=(max_seq_len,d_model)),
            tf.keras.layers.LSTM(128),
            tf.keras.layers.Dense(output_dim_s, activation='softmax')
        ])
        
        
        
    def _get_positional_encoding(self, max_seq_len, d_model):
        position = tf.range(max_seq_len, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, d_model, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / d_model))
        sin = tf.math.sin(position * div_term)
        cos = tf.math.cos(position * div_term)
        pos_encoding = tf.concat([sin, cos], axis=-1)
        pos_encoding = pos_encoding[tf.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)
    
    def _get_feed_forward(self, dff, d_model):
        return tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])
    
    def create_attention_mask( self, seq_len):
        mask = tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
        return mask
    
    def call(self, inputs):
        inputs, outputs = inputs
        input_embed = self.input_embedding(inputs)
        pos_encoding_in = self.positional_encoding_in[:, :self.input_shapes, :]
        x = input_embed + pos_encoding_in
        for i in range(self.num_layers):
            multihead_out = self.multihead_attentions[i](x, x)
            add_norm_1_out = self.add_norm_1_layers[i](x + multihead_out)
            feed_forward_out = self.feed_forward_layers[i](add_norm_1_out)
            add_norm_2_out = self.add_norm_2_layers[i](add_norm_1_out + feed_forward_out)
            x = add_norm_2_out
            
        

        attention_mask = tf.transpose(self.create_attention_mask(self.output_shapes))
        attention_mask = tf.expand_dims(attention_mask, axis=0)
        attention_mask = tf.expand_dims(attention_mask, axis=0)
        
        out_embed = self.output_embedding(outputs)
        post_encoding_out = self.positional_encoding_out[:, :self.output_shapes, :]
        y = out_embed + post_encoding_out
        for i in range(self.num_layers):
            multihead_out_dec_mask = self.multihead_attentions_dec_mask[i](
                query = y, key = y, value = y
                , attention_mask = attention_mask
            )
            add_norm_1_out_dec = self.add_norm_1_layers_dec[i](y + multihead_out_dec_mask)
            multihead_out_dec = self.multihead_attentions_dec[i](
                query = add_norm_1_out_dec, key =  add_norm_2_out, value = add_norm_2_out
            )
            add_norm_2_out_dec = self.add_norm_2_layers_dec[i]( add_norm_1_out_dec + multihead_out_dec)
            feed_forward_out_dec = self.feed_forward_layers_dec[i](add_norm_2_out_dec)
            add_norm_3_out_dec = self.add_norm_3_layers_dec[i](add_norm_2_out_dec + feed_forward_out_dec)
            y = add_norm_3_out_dec
        final = self.final_layer(y)
        return final

In [18]:
#preopen market predictions
def data_maker_pom_test (i):
    df = pd.read_csv(i, index_col = [0])
    df = df.iloc[df[df.timestamp.str.contains(df.timestamp.apply(lambda x: x.split(' ')[0]).unique()[-360])].iloc[0].name:]
    df.reset_index(drop = True, inplace = True)
    times = pd.DataFrame(df['timestamp'])
    pl = pd.DataFrame(df['pl'])
    dff = pd.DataFrame()
    st = datetime.strptime(df.timestamp.iloc[ 0 ], '%Y-%m-%d %H:%M:%S') #+ timedelta(days = 1)
    et = datetime.strptime(df.timestamp.iloc[-1 ], '%Y-%m-%d %H:%M:%S')
    while True:
        if st > et + timedelta(days = 1):
            break
        srt = datetime.strftime(st , '%Y-%m-%d %H:%M:%S')
        tmp = df[df['timestamp'].str.contains(srt.split(' ')[0])].copy()
        tmp.drop(columns = ['timestamp'], inplace = True)
        lis = list(tmp.columns)
        if len(tmp) == 0:
            st += timedelta(days = 1)
            continue
        scaler = StandardScaler()
        scaler.fit(tmp)
        tmp = scaler.transform(tmp)
        
        tmp = pd.DataFrame(tmp, columns = lis)
        dff = pd.concat([
            dff,
            tmp
        ])
        del scaler
        st += timedelta(days = 1)
    dff.reset_index(drop = True, inplace = True)
    dff['pl'] = pl['pl']
    return dff,times,df

def prepare_data_com_pom(df,i,times,timesteps):
    pl = df[['pl']]
    df['class'] = [1 if j[0] >0 else 0 for j in pl.values]
    y = df[['class']]
    df['pl'] = pl
    df['class'] = y
    col = list(df.columns)
    tdf = pd.DataFrame(columns = ['timestamp'])
    
    dt = times.iloc[0].values[0]
    dt = dt.split(' ')[0]
    et = times.iloc[-1].values[0]
    et = et.split(' ')[0]
    dtt = datetime.strptime(dt, '%Y-%m-%d')
    ett = datetime.strptime(et, '%Y-%m-%d')
    fd = pd.DataFrame(columns = col)
    fdd = df.copy()
    while dtt < ett + timedelta(days = 1):
        
        try:
            ind = times[times['timestamp'].str.contains(dtt.strftime('%Y-%m-%d'))].iloc[-1].name
            tdf = pd.concat([
                tdf,
                pd.DataFrame(
                    times[
                        times['timestamp'].str.contains(
                            dtt.strftime('%Y-%m-%d')
                        )].iloc[-1]
                ).T
            ])
        except:
            dtt+= timedelta(days = 1)
            continue 
        ad = pd.DataFrame(fdd.loc[ind-timesteps:ind],columns = col)
        ad = ad[col]
        fd = pd.concat([
            fd,
            ad
        ])
        dtt += timedelta(days = 1)
    fd.reset_index(drop = True, inplace = True)
    col = fd.columns
    return fd, col,tdf

def plc_pom(fd):
    data = fd['pl']
    range_50 = np.percentile(data, [16, 84]) 
    range_75 = np.percentile(data, [3, 97])  
    range_100 = np.percentile(data, [0, 100])
    fd['plc'] = fd['pl'].apply(
        lambda x : 1 if (x <= range_50[1] and x > 0 ) else( 
            -1
            if (x > range_50[0] and x < 0) else(
                2
                if (x > range_50[1] and x < range_75[1]) else(
                    -2
                    if (x < range_50[0] and x > range_75[0]) else (
                        3 if (x > range_75[1]) else(
                            -3 if (x < range_75[0]) else 0
                        )
                    )
                )
            )
        )
    )
    return fd, (range_50,range_75,range_100)

def plc_pom_4(fd):
    data = fd['pl']
    range_45 = np.percentile(data, [27.5, 72.5])
    range_50 = np.percentile(data, [16, 84]) 
    range_75 = np.percentile(data, [3, 97])  
    range_100 = np.percentile(data, [0, 100])
    fd['plc'] = fd['pl'].apply(
        lambda x : 1 if (x > 0 and x <= range_45[1]) else(
            -1 if(x<0 and x > range_45[0] ) else(
            2 if(x >= range_45[1] and x < range_50[1]) else(
            -2 if (x <= range_45[0] and x > range_50[0]) else(
            3 if (x >= range_50[1] and x < range_75[1]) else(
            -3 if (x <= range_50[0] and x > range_75[0]) else(
            4 if (x >= range_75[1]) else (
            -4 if (x <= range_75[0]) else 0
            )
            )
            )
            )
            )
        )
    ))
    return fd, (range_50,range_75,range_100)

def splitter_com(df,timesteps):
    xx = np.empty((0,timesteps + 1,len(df.columns) - 3))
    yy = np.empty((0,timesteps + 1 ,1))
    pll = np.empty((0,timesteps + 1 , 1))
    plcc = np.empty((0,timesteps + 1 , 1))
    
    yt = df[['class']]
    pltt = df[['pl']]
    pltc = df[['plc']]
    
    loop = len(df) / (timesteps+1)
    for i in range(0,int(loop)):
        x = df.loc[i*(timesteps+1): ((i+1) *(timesteps + 1 )) - 1].copy()
        y = yt.loc[i*(timesteps+1): ((i+1) *(timesteps + 1 )) - 1]
        pl = pltt.loc[i*(timesteps+1): ((i+1) *(timesteps + 1 )) - 1]
        plc = pltc.loc[i*(timesteps+1): ((i+1) *(timesteps + 1 )) - 1]
        
        x.drop(columns = ['class','pl','plc'], inplace = True)
        x = np.array(x).reshape(1,timesteps+1,len(x.columns))
        y = np.array(y).reshape(1,timesteps+1,1)
        pl = np.array(pl).reshape(1,timesteps+1,1)
        plc = np.array(plc).reshape(1,timesteps+1,1)
        
        xx = np.append(xx,x,axis = 0)
        yy = np.append(yy,y, axis = 0)
        pll = np.append(pll,pl, axis = 0)
        plcc = np.append(plcc,plc, axis = 0)
    return xx,yy,pll,plcc

def padder_com(x,y,pl,plc,sli,timesteps):
    xtt = np.empty((0,timesteps+1,x.shape[2]))
    ytt = np.empty((0,timesteps+1,y.shape[2]))
    plttt = np.empty((0,timesteps+1,pl.shape[2]))
    pltcc = np.empty((0,timesteps+1,plc.shape[2]))
    xee = np.empty((0,timesteps+1,x.shape[2]))
    yee = np.empty((0,timesteps+1,y.shape[2]))
    plee = np.empty((0,timesteps+1,pl.shape[2]))
    plecc = np.empty((0,timesteps+1,plc.shape[2]))
    for i in range(0,len(x) - sli):
        for j in range(1,timesteps+2):
            xt = np.pad(
                x[i][- j: ].reshape(1,j,x.shape[2]),
                ((0, 0), (timesteps + 1 -j, 0), (0, 0)),
                mode = 'constant',
                constant_values=-1
            )
            yt = np.pad(
                y[i][- j: ].reshape(1,j,y.shape[2]),
                ((0, 0), (timesteps + 1-j, 0), (0, 0)),
                mode = 'constant', 
                constant_values=-1
            )
            pltt = np.pad(
                pl[i][- j: ].reshape(1,j,pl.shape[2]),
                ((0, 0), (timesteps + 1 - j , 0), (0, 0)),
                mode = 'constant', 
                constant_values=0
            )
            pltc = np.pad(
                plc[i][- j: ].reshape(1,j,plc.shape[2]),
                ((0, 0), (timesteps + 1 - j , 0), (0, 0)),
                mode = 'constant', 
                constant_values=0
            )
            xtt = np.append(xtt,xt, axis = 0)
            ytt = np.append(ytt,yt, axis = 0)
            plttt = np.append(plttt,pltt, axis = 0)
            pltcc = np.append(pltcc,pltc, axis = 0)
    for i in range(len(x) - sli, len(x)):
        for j in range(1,timesteps+2):
            xt = np.pad(
                x[i][- j: ].reshape(1,j,x.shape[2]),
                ((0, 0), (timesteps + 1 - j, 0), (0, 0)),
                mode = 'constant', 
                constant_values=-1
            )
            yt = np.pad(
                y[i][- j: ].reshape(1,j,y.shape[2]),
                ((0, 0), (timesteps + 1 - j, 0), (0, 0)),
                mode = 'constant', 
                constant_values=-1
            )
            pltt = np.pad(
                pl[i][- j: ].reshape(1,j,pl.shape[2]),
                ((0, 0), (timesteps + 1 - j, 0), (0, 0)),
                mode = 'constant', 
                constant_values=0
            )
            pltc = np.pad(
                plc[i][- j: ].reshape(1,j,plc.shape[2]),
                ((0, 0), (timesteps + 1 - j, 0), (0, 0)),
                mode = 'constant', 
                constant_values=0
            )
            xee = np.append(xee,xt, axis = 0)
            yee = np.append(yee,yt, axis = 0)
            plee = np.append(plee,pltt, axis = 0)
            plecc = np.append(plecc,pltc, axis = 0)

    return xtt,ytt,plttt, pltcc,xee,yee,plee,plecc


In [19]:
import pandas as pd
df_main = pd.read_csv('../data/market_indices.csv', index_col = [0])
df_main

Unnamed: 0,symbol
0,CNXFMCG
1,XBANK
2,NIFTYQUALITY30
3,DJUSRD
4,DJUSMC
...,...
155,BELMG
156,DJUSAS
157,DJUSEE
158,AMX


In [20]:
from concurrent.futures import ThreadPoolExecutor
import gc
class StopAtAccuracy(tf.keras.callbacks.Callback):
    def __init__(self, target=0.995):
        super().__init__()
        self.target = target

    def on_epoch_end(self, epoch, logs=None):
        acc = logs.get("accuracy")  # Use "val_accuracy" if you're using validation accuracy
        if acc is not None and acc >= self.target:
            print(f"\nDesired accuracy reached. Stopping training.")
            self.model.stop_training = True




def get_mod_pom(timesteps):
    input_dim = 162
    output_dim = 1
    output_dim_s = 6
    max_seq_len = timesteps
    max_seq_len_out = timesteps
    model = TransformerModel(
        input_dim = input_dim,
        output_dim = output_dim,
        output_dim_s = output_dim_s,
        d_model = 64,#high 128 #64
        num_heads = 8,#2 works fine
        dff = 128,#200
        max_seq_len = max_seq_len,
        max_seq_len_out = max_seq_len_out ,
        num_layers = 1,#high 1
        rate = 0.1
    )
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def run_simulation(i, timesteps, timess):
    try:
        sli = 108
        sym = '../data/Database_sh_backup/' + i + '.csv'
        dff,times,df = data_maker_pom_test (sym)        
        fd, col , tdf = prepare_data_com_pom(dff.copy(),sym,times,timesteps)
        scaler = StandardScaler()
        scaler.fit(fd[['pl']][:len(fd) - sli])
        fd['pl'] = scaler.transform(fd[['pl']])
        fd,tup = plc_pom(fd)
        x,y,pl,plc = splitter_com(fd,timesteps)
        xt, yt, pltt, pltc,xe, ye, ple, plec = padder_com(x,y,pl,plc,sli,timesteps)
        otr = np.array([ [1 if i[0] == 1 else 0 ,0 if i[0] == 1 else 1]  for i in yt[:,-1,:]])
        ote = np.array([ [1 if i[0] == 1 else 0 ,0 if i[0] == 1 else 1]  for i in ye[:,-1,:]])
        otrc = np.array(
        [
            [
                1 if i[0] == -3 else 0 ,
                1 if i[0] == -2 else 0 ,
                1 if i[0] == -1 else 0 ,
                1 if i[0] == 1 else 0 ,
                1 if i[0] == 2 else 0 ,
                1 if i[0] == 3 else 0
            ]  for i in pltc[:,-1,:]
        ]
        )
        otec = np.array(
        [
            [
                1 if i[0] == -3 else 0 ,
                1 if i[0] == -2 else 0 ,
                1 if i[0] == -1 else 0 ,
                1 if i[0] == 1 else 0 ,
                1 if i[0] == 2 else 0 ,
                1 if i[0] == 3 else 0
            ]  for i in plec[:,-1,:]
        ]
        )
        model = get_mod_pom(timesteps)
        yp = model.predict([xt[:, 1:, :], pltt[:, :-1, :]])
        
        timess = timess
        history = model.fit(
            [xt[:, 1:, :], pltt[:, :-1, :]], otrc[:],
            epochs=timess,
            callbacks=[StopAtAccuracy(target=0.995)]
        
        )
        if os.path.exists('../cmg_results/CMG/record.csv'):
            pd.DataFrame([[i,history.history['loss'][-1],history.history['accuracy'][-1]]],
                         columns = ['sym','loss','accuracy']).to_csv('../cmg_results/CMG/record.csv')
        else:
            pd.DataFrame([[i,history.history['loss'][-1],history.history['accuracy'][-1]]], 
                      columns = ['sym','loss','accuracy']).to_csv('../cmg_results/CMG/record.csv', header = False, mode = 'a')
            
        yp = model.predict([xe[:, 1:, :], ple[:, :-1, :]])
        ypp = yp
        model.save_weights("../cmg_results/CMG/models/"+i+'.h5')
        del model
        gc.collect()
        count = 0
        li = []
        for ii in yp:
            if (count+1)%(timesteps + 1) == 0:
                if (ii[0] + ii[1]  +ii[2] )  >( ii[3] +ii[4]  + ii[5]):
                    li.append(0)
                else:
                    li.append(1)
            count += 1

        y_pred = pd.DataFrame(li, columns = ['pred'])
        y_pred['timestamp'] = list(list ( tdf[-int(len(yp)/(timesteps + 1)):]['timestamp'].apply(lambda x:x.split(' ')[0])))
        y_pred = y_pred [['timestamp', 'pred']]
        
# ================================================
# ⚠️ OHLC Data Usage (Commented Out)
# ================================================
# This section of the code utilizes OHLC (Open, High, Low, Close) financial market data.
# Due to licensing restrictions, the actual OHLC dataset used in this project cannot be
# uploaded or distributed publicly via this repository.
#
# Many OHLC data sources (e.g., Yahoo Finance, Bloomberg, NSE, etc.) allow personal or
# research use, but prohibit redistribution or public sharing of the data files.
#
# To reproduce results:
# - You may use your own OHLC data.
# - Or modify the script to fetch data from APIs (e.g., yfinance).
#
# =================================================

#         #ori =  '../../../../../Database_indices/Database_backup/'+i+'.csv'
#         ori = '../data/Database_backup/'
#         ori_df = pd.read_csv(ori, index_col = [0])    
#         dt_end = datetime.strptime(y_pred.iloc[-1]['timestamp'],'%Y-%m-%d')
#         acc = pd.DataFrame(columns = ['date','accuracy', 'potential'])
#         for ind , row in y_pred.iterrows():
#             dt = datetime.strptime(row.timestamp,'%Y-%m-%d')
#             dt_orig = dt
#             close_previous = ori_df [ori_df.datetime.str.contains(dt_orig.strftime('%Y-%m-%d'))].iloc[-1].close
#             checker = 0
#             while True:
#                 dt += timedelta(days = 1)
#                 if dt > dt_end:
#                     checker = 1
#                     break
#                 sd = ori_df[ori_df.datetime.str.contains(dt.strftime('%Y-%m-%d'))].copy()
#                 if len(sd) > 0:
#                     break
#             if checker == 1:
#                 continue
#             if len(sd) > 0:
#                 sdd = sd[:int(len(sd) / 4)] #first quarter
#                 check = 0
#                 tmp = -1
#                 for indd, roww in sdd.iterrows():
#                     if row.pred == 1:
#                         if roww.close > close_previous:
#                             if tmp < (roww.close - close_previous):
#                                 tmp = roww.close - close_previous
#                             check = 1
#                     else:
#                         if roww.close < close_previous:
#                             if tmp < (close_previous - roww.close):
#                                 tmp = close_previous - roww.close
#                             check = 1
#                 if check == 1:
#                     acc = pd.concat([acc, pd.DataFrame(
#                         [[dt_orig.strftime('%Y-%m-%d'), check, tmp]], columns =  ['date','accuracy', 'potential'])]
#                                    )
#                 else : 
#                     acc = pd.concat([acc, pd.DataFrame(
#                         [[dt_orig.strftime('%Y-%m-%d'), check, tmp]], columns =  ['date','accuracy', 'potential'])]
#                                    )

#         acc.to_csv("results/CMG/accuracy/"+i+".csv")           
#         print(i,acc['accuracy'].mean())
        clear_output(wait=True)
        gc.collect()
    except Exception as e:
        print("❌ An error occurred:")
        traceback.print_exc()
        gc.collect()
    
for i in df_main.symbol:
# for i in df_main[ df_main [df_main.symbol == 'NSEI'].index[0]:].symbol:
    print(i)
    run_simulation(i, 3, 400)
    break
# Force garbage collection

# with ThreadPoolExecutor(max_workers=4) as executor:
#     results = list(executor.map(run_simulation, list(df_main['symbol'])))

CNXFMCG
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Desired accuracy reached. Stopping training.


In [54]:
test.shape

(720, 6)

In [21]:
%%time
# get the average accuracy
import glob
import os
import pandas as pd
import glob
import os

# Specify the folder path
folder_path = '../cmg_results/CMG/accuracy/'  # Replace with your folder path
li = []
# Get all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))
for file in csv_files:
    tmp = pd.read_csv(file, index_col = [0])
#     li.append(tmp.loc["accuracy", "f1-score"])
    li.append(tmp["accuracy"].mean())
    
print(sum(li)/len(li)*100)

76.72313084112147
CPU times: user 1.01 s, sys: 92.5 ms, total: 1.1 s
Wall time: 2.85 s
