In [3]:
import os

In [4]:
os.mkdir("utils")

In [5]:
%%writefile utils/randomness.py
import os
import numpy as np
import random as rn
import tensorflow as tf
import pandas as pd

def set_randomness():
  '''
  Sets the randomness in the code. But still during training we may have
  randomness because we use GPU.

  '''
  os.environ['PYTHONHASHSEED']="0"
  rn.seed(42)
  np.random.seed(42)

Writing utils/randomness.py


In [6]:
%%writefile EDA.py
"""
The EDA and Cleaning the data.
"""
import numpy as np
import argparse
import pandas as pd
import pathlib
import os
import matplotlib.pyplot as plt
import matplotlib.image as implt


def Visualize(df):
  paths=np.random.choice(df['image_path'].values,size=32,replace=False)
  plt.subplots(8,4,figsize=(30,30))
  for i,path in enumerate(paths):
    plt.subplot(8,4,i+1)
    img=implt.imread(path)
    plt.imshow(img)
  plt.savefig
  if not os.path.isdir("plots"):
    os.mkdir("plots")
  plt.savefig("plots/random_images.jpg")
  if args.check_sizes:
    print("checking height distributions")
    df['height']=df['image_path'].apply(lambda x: implt.imread(path).shape[0])
    df['width']=df['image_path'].apply(lambda x: implt.imread(path).shape[1])
    plt.figure(figsize=(15,10))
    plt.hist(df['height'])
    plt.title("height distributions")
    plt.savefig("plots/height_distributions.jpg")
    print("checking weigth distributions")
    plt.figure(figsize=(15,10))
    plt.hist(df['width'])
    plt.title("weidth distributions")
    plt.savefig("plots/weigth_distributions.jpg")
  

if __name__=="__main__":
  parser=argparse.ArgumentParser(description="EDA and cleaning")
  parser.add_argument("--data_path",help="path to the data file", type=pathlib.Path,required = True)
  parser.add_argument("--check_sizes",help="whether to check the distributions of height and width of images", 
                      type=bool)
  args=parser.parse_args()
  df=pd.read_csv(args.data_path)
  df['image_path']=df['image_path']=df['image'].apply(lambda x: "/content/train_images/"+x)
  if not os.path.isdir("processed_data"):
    os.mkdir("processed_data")
  df.to_csv("processed_data/cleaned_data.csv",index=False)
  Visualize(df)

Writing EDA.py


In [7]:
%%writefile create_fold.py
"""
This file creates the group kfold split to the data. 
And applies label Encoder to the target Feature.
"""
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold
from utils.randomness import *
import argparse
import pickle
import pathlib

def CREATE_FOLD(args):
  df=pd.read_csv(args.data_path)
  encoder=LabelEncoder()
  df['label_group']=encoder.fit_transform(df['label_group'])
  print("label encoding is done")
  if not os.path.isdir("encoders"):
    os.mkdir("encoders")
  with open("encoders/label_group_encoder.pkl","wb") as f:
    pickle.dump(encoder,f)
  df['gfold']=-1
  gfold=GroupKFold(n_splits=5)
  for i,(train,test) in enumerate(gfold.split(df,groups=df['label_group'])):
    df.loc[test,'gfold']=i
  print("created the group kfold")
  if not os.path.isdir("processed_data"):
    os.mkdir("processed_data")
    print("created processed_data directory")
  df.to_csv("processed_data/fold_data.csv",index=False)
  print("group kfold data is stored at processed_data/fold_data.csv")
if __name__=="__main__":
  set_randomness()
  parser=argparse.ArgumentParser(description="create folds")
  parser.add_argument("--data_path",help="path to the data file", type=pathlib.Path,required = True)
  args=parser.parse_args()
  CREATE_FOLD(args)

Writing create_fold.py


In [8]:
#from tensorflow.keras.applications import EfficientNetB4
#pre_trained_model=EfficientNetB4(include_top=False,input_shape=(512,512,3))
#for i,layer in enumerate(pre_trained_model.layers):
#  print(i,layer.name)

In [9]:
%%writefile params.yaml
data:
 initial_file: "train.csv"
model_type: "image"
image:
 image_size: (512,512)
 unfreeze: 324
 pre_trained_name: "EfficientNetB4"
text:
  max_length: 512
  pre_trained_name: "bert-base-uncased"
scheduler: "one_cycle"

Writing params.yaml


In [11]:
%%writefile dvc.yaml
vars:
- params.yaml
stages:
 eda:
  cmd: python EDA.py --data_path ${data.initial_file} --check_sizes True
  params:
  - ${data.initial_file}
  deps:
  - EDA.py
  outs:
  - processed_data/cleaned_data.csv
  plots:
  - plots/weigth_distributions.jpg:
      cache: false
  - plots/height_distributions.jpg:
      cache: false
  - plots/random_images.jpg:
      cache: false
 Folds:
  cmd: python create_fold.py --data_path "processed_data/cleaned_data.csv"
  deps:
  - create_fold.py
  - processed_data/cleaned_data.csv
  outs:
  - encoders/label_group_encoder.pkl
  - processed_data/fold_data.csv
 training:
  cmd: >
  python training.py --data_path processed_data/fold_data.csv --model_type ${model_type} \
  --batch_size 32 --save_model_path "models_dir/image_test" --epochs 30 --lr_callback ${scheduler}
  deps:
  - training.py
  - processed_data/fold_data.csv
  - utils/models/py
  - utils/dataloaders/py
  outs:
  - "models_dir/image_test-0.h5"
  - "models_dir/image_test-1.h5"
  - "models_dir/image_test-2.h5"
  - "models_dir/image_test-3.h5"
  - "models_dir/image_test-4.h5"
  params:
  - ${scheduler}
  - ${model_type}
  - ${image.unfreeze}
  - ${image.pretrained_name}
  - ${text.pretrained_name}

Overwriting dvc.yaml


In [12]:
%%writefile params.py

import yaml
with open("params.yaml") as f:
  HYPERPARAMETERS=yaml.safe_load(f)

HYPERPARAMETERS['image']['image_size']=tuple(int(x) for x in HYPERPARAMETERS['image']['image_size'][1:-1].split(","))

Writing params.py


In [13]:
%%writefile utils/models.py
"""
Below we have models for image, text and combined models and arcface layer

"""
import math
import tensorflow as tf
from tensorflow.keras.layers import Dense,Input,Layer
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB4
from transformers import TFBertModel,TFRobertaModel,TFAlbertModel,TFXLNetModel


class ARCFACE_LAYER(Layer):
  def __init__(self,m=0.5,s=60,n_classes=11014):
    super(ARCFACE_LAYER,self).__init__()
    self.m=m
    self.s=s
    self.sin_m=tf.sin(m)
    self.cos_m=tf.cos(m)
    self.n_classes=n_classes
    self.threshold = tf.cos(math.pi - m)
    self.mm = tf.math.sin(math.pi - m) * m
  
  def build(self,input_shape):
    prev_layer_units=input_shape[0][1]
    self.w=self.add_weight(shape=(prev_layer_units,self.n_classes),trainable=True)

  def get_config(self):
    config=super().get_config()
    config.update({"m":0.5,
                   "s":60,
                   "n_classes":11014})
    return config


  def call(self,inputs):
    prev_layer,y=inputs
    y=tf.cast(y,dtype=tf.int32)
    y_hot=tf.one_hot(y,self.n_classes)
    y_hot=tf.cast(y_hot,dtype=tf.float32)
    w_norm=tf.linalg.l2_normalize(self.w,axis=0)
    x_norm=tf.linalg.l2_normalize(prev_layer,axis=1)
    cos_theta=tf.linalg.matmul(x_norm,w_norm)
    cos_theta=tf.keras.backend.clip(cos_theta,-1+1e-5,1-1e-5)
    sin_theta=tf.sqrt(1-tf.pow(cos_theta,tf.cast(2,dtype=tf.float32)))
    cos_theta_m=(cos_theta*self.cos_m)-(sin_theta*self.sin_m)
    cos_theta_m=tf.where(cos_theta_m>self.cos_m,cos_theta_m,cos_theta-self.mm)
    final=self.s*((y_hot*cos_theta_m)+((1-y_hot)*cos_theta))
    return final
  


def IMAGE_MODEL(image_size,unfreeze_layers_number):
  tf.keras.backend.clear_session()
  pre_trained=EfficientNetB4(include_top=False,weights="imagenet",input_shape=(image_size[0],image_size[1],3))
  ins=Input((),name="label_input")
  for i,layer in enumerate(pre_trained.layers):
    if i>=unfreeze_layers_number:
      if not layer.name.endswith("bn"):
        pre_trained.layers[i].trainable=True
      else:
        pre_trained.layers[i].trainable=False
    else:
      pre_trained.layers[i].trainable=False
  x=pre_trained.layers[-1].output
  x=tf.keras.layers.GlobalMaxPooling2D()(x)
  x=Dense(512)(x)
  arc_layer=ARCFACE_LAYER()
  x=arc_layer([x,ins])
  outs=tf.keras.layers.Softmax()(x)
  model=Model(inputs=(pre_trained.input,ins),outputs=outs)
  return model

def TEXT_MODEL(pre_trained_name,max_length):
  tf.keras.backend.clear_session()
  input_ids=Input((max_length,),dtype=tf.int32)
  attention_mask=Input((max_length,),dtype=tf.int32)
  token_type_ids=Input((max_length,),dtype=tf.int32)
  ins=Input((),name="label_input")
  pre_trained=TFBertModel.from_pretrained(pre_trained_name,output_hidden_states=True)
  pre_outputs=pre_trained({"input_ids":input_ids,"attention_mask":attention_mask,
                  "token_type_ids":token_type_ids})
  hidden_layers=[]
  for i in range(4):
    hidden_layers.append(pre_outputs['hidden_states'][-i])
  x=tf.keras.layers.Concatenate()(hidden_layers)[:,0,:]
  x=Dense(512)(x)
  arc_layer=ARCFACE_LAYER()
  x=arc_layer([x,ins])
  outs=tf.keras.layers.Softmax()(x)
  model=Model(inputs=({"input_ids":input_ids,"attention_mask":attention_mask,
                 "token_type_ids":token_type_ids},ins),outputs=outs)
  return model

def COMBINE_MODEL(max_length,image_size,unfreeze_layers_number):
  tf.keras.backend.clear_session()
  input_ids=Input((max_length,),dtype=tf.int32)
  attention_mask=Input((max_length,),dtype=tf.int32)
  token_type_ids=Input((max_length,),dtype=tf.int32)
  ins=Input((),name="label_input")
  text_trained=TFBertModel.from_pretrained("bert-base-uncased",output_hidden_states=True)
  text_outputs=text_trained({"input_ids":input_ids,"attention_mask":attention_mask,
                  "token_type_ids":token_type_ids})
  hidden_layers=[]
  for i in range(4):
    hidden_layers.append(text_outputs['hidden_states'][-i])
  x1=tf.keras.layers.Concatenate()(hidden_layers)[:,0,:]
  ################
  img_trained=DenseNet201(include_top=False,weights="imagenet",input_shape=(image_size[0],image_size[1],3))
  for i,layer in enumerate(img_trained.layers):
    if i>=unfreeze_layers_number:
      if not layer.name.endswith("bn"):
        img_trained.layers[i].trainable=True
      else:
        img_trained.layers[i].trainable=False
    else:
      img_trained.layers[i].trainable=False
  x2=img_trained.layers[-1].output
  x2=tf.keras.layers.GlobalMaxPooling2D()(x2)
  ################
  x=tf.keras.layers.Concatenate()([x1,x2])
  x=Dense(512)(x)
  arc_layer=ARCFACE_LAYER()
  x=arc_layer([x,ins])
  outs=tf.keras.layers.Softmax()(x)
  model=Model(inputs=({"input_ids":input_ids,"attention_mask":attention_mask,
                 "token_type_ids":token_type_ids},
                 img_trained.input,ins),outputs=outs)
  return model

Writing utils/models.py


In [14]:
%%writefile utils/dataloaders.py
"""
Data loaders for the models

"""
import albumentations as A
import tensorflow as tf
import numpy as np
from transformers import BertTokenizer

class IMG_DATA_LOADER(tf.keras.utils.Sequence):
  def __init__(self,dataframe,image_size,batch_size,aug,shuffle,inference=False):
    self.data=dataframe
    self.batch_size=batch_size
    self.shuffle=shuffle
    self.image_size=image_size
    self.aug=aug
    self.inference=inference
    self.n=0
    self.max_=self.__len__()
    self.indexes=np.arange(self.data.shape[0])
    self.temp_indexes=np.arange(self.data.shape[0])
    if not self.inference:
      self.on_epoch_end()

  def __len__(self):
    return int(np.ceil(self.data.shape[0]/self.batch_size))
  
  def on_epoch_end(self):
    if self.shuffle:
      np.random.shuffle(self.temp_indexes)
  
  def next(self):
    if self.n>self.max_:
      self.n=0
      result=self.__getitem__(self.n)
      self.n+=1
    else:
      result=self.__getitem__(self.n)
      self.n+=1
    return result
  
  def Augment_images(self,image):
    transformer=A.Compose([A.Rotate(limit=30,p=0.8),
          A.HorizontalFlip(),
          #A.CoarseDropout(max_height=0.25,max_width=0.25,),
          A.ShiftScaleRotate(shift_limit=0.09,scale_limit=0.2,rotate_limit=0),
          A.RandomBrightnessContrast()
          ])
    image=transformer(image=image)['image']
    return image

  def __getitem__(self,batch):
    curr_temp_indexes=self.temp_indexes[batch*self.batch_size:(batch+1)*self.batch_size]
    curr_batch=list(self.indexes[i] for i in curr_temp_indexes)
    IMAGES=np.zeros((len(curr_batch),self.image_size[0],self.image_size[1],3))
    if not self.inference:
      Y=np.zeros((len(curr_batch),))      
    for i,idx in enumerate(curr_batch):
      img_name=self.data.iloc[idx]['image_path']
      labels=self.data.iloc[idx]['label_group']
      img=tf.keras.preprocessing.image.load_img(img_name,target_size=self.image_size)
      img=tf.keras.preprocessing.image.img_to_array(img)/255.0
      if self.aug:
        img=self.Augment_images(img)
      IMAGES[i,]=img
      if not self.inference:
        Y[i,]=labels
    if not self.inference:
      return (IMAGES,Y),Y
    else:
      return IMAGES


class TEXT_DATA_LOADER(tf.keras.utils.Sequence):
  def __init__(self,dataframe,max_length,pre_trained_name,batch_size,shuffle,inference=False):
    self.data=dataframe
    self.batch_size=batch_size
    self.shuffle=shuffle
    self.max_length=max_length
    self.pre_trained_name=pre_trained_name
    self.inference=inference
    self.tokenizer=BertTokenizer.from_pretrained(self.pre_trained_name)
    self.n=0
    self.max_=self.__len__()
    self.indexes=np.arange(self.data.shape[0])
    self.temp_indexes=np.arange(self.data.shape[0])
    if not self.inference:
      self.on_epoch_end()

  def __len__(self):
    return int(np.ceil(self.data.shape[0]/self.batch_size))
  
  def on_epoch_end(self):
    if self.shuffle:
      np.random.shuffle(self.temp_indexes)
  
  def next(self):
    if self.n>self.max_:
      self.n=0
      result=self.__getitem__(self.n)
      self.n+=1
    else:
      result=self.__getitem__(self.n)
      self.n+=1
    return result
  
  def __getitem__(self,batch):
    curr_temp_indexes=self.temp_indexes[batch*self.batch_size:(batch+1)*self.batch_size]
    curr_batch=list(self.indexes[i] for i in curr_temp_indexes)
    INPUT_IDS=np.zeros((len(curr_batch),self.max_length),dtype=np.int32)
    ATTENTION_MASK=np.zeros((len(curr_batch),self.max_length),dtype=np.int32)
    TOKEN_TYPE_IDS=np.zeros((len(curr_batch),self.max_length),dtype=np.int32)
    if not self.inference:
      Y=np.zeros((len(curr_batch),))
    for i,idx in enumerate(curr_batch):
      title=self.data.iloc[idx]['title']
      labels=self.data.iloc[idx]['label_group']
      tokenized_title=self.tokenizer.encode_plus(title,padding="max_length",
                                                truncation="longest_first",max_length=self.max_length)
      
      INPUT_IDS[i,]=tokenized_title['input_ids']
      ATTENTION_MASK[i,]=tokenized_title['attention_mask']
      TOKEN_TYPE_IDS[i,]=tokenized_title['token_type_ids']
      if not self.inference:
        Y[i,]=labels
    if not self.inference:
      return ({"input_ids":INPUT_IDS,"attention_mask":ATTENTION_MASK,
              "token_type_ids": TOKEN_TYPE_IDS},Y),Y
    else:
      return {"input_ids":INPUT_IDS,"attention_mask":ATTENTION_MASK,
              "token_type_ids": TOKEN_TYPE_IDS}

class BOTH_DATA_LOADER(tf.keras.utils.Sequence):
  def __init__(self,dataframe,image_size,batch_size,max_length,text_pre_trained_name,aug,shuffle,inference=False):
    self.data=dataframe
    self.batch_size=batch_size
    self.shuffle=shuffle
    self.inference=inference
    self.image_size=image_size
    self.aug=aug
    self.max_length=max_length
    self.pre_trained_name=text_pre_trained_name
    self.tokenizer=BertTokenizer.from_pretrained(self.pre_trained_name)
    self.n=0
    self.max_=self.__len__()
    self.indexes=np.arange(self.data.shape[0])
    self.temp_indexes=np.arange(self.data.shape[0])
    if not self.inference:
      self.on_epoch_end()

  def __len__(self):
    return int(np.ceil(self.data.shape[0]/self.batch_size))
  
  def on_epoch_end(self):
    if self.shuffle:
      np.random.shuffle(self.temp_indexes)
  
  def next(self):
    if self.n>self.max_:
      self.n=0
      result=self.__getitem__(self.n)
      self.n+=1
    else:
      result=self.__getitem__(self.n)
      self.n+=1
    return result
  
  def Augment_images(self,image):
    transformer=A.Compose([A.Rotate(limit=30,p=0.8),
          A.HorizontalFlip(),
          #A.CoarseDropout(max_height=0.25,max_width=0.25,),
          A.ShiftScaleRotate(shift_limit=0.09,scale_limit=0.2,rotate_limit=0),
          A.RandomBrightnessContrast()
          ])
    image=transformer(image=image)['image']
    return image

  def __getitem__(self,batch):
    curr_temp_indexes=self.temp_indexes[batch*self.batch_size:(batch+1)*self.batch_size]
    curr_batch=list(self.indexes[i] for i in curr_temp_indexes)
    IMAGES=np.zeros((len(curr_batch),self.image_size[0],self.image_size[1],3))
    INPUT_IDS=np.zeros((len(curr_batch),self.max_length),dtype=np.int32)
    ATTENTION_MASK=np.zeros((len(curr_batch),self.max_length),dtype=np.int32)
    TOKEN_TYPE_IDS=np.zeros((len(curr_batch),self.max_length),dtype=np.int32)
    if not self.inference:
      Y=np.zeros((len(curr_batch),))
    for i,idx in enumerate(curr_batch):
      img_name=self.data.iloc[idx]['image_path']
      labels=self.data.iloc[idx]['label_group']
      img=tf.keras.preprocessing.image.load_img(img_name,target_size=self.image_size)
      img=tf.keras.preprocessing.image.img_to_array(img)/255.0
      if self.aug:
        img=self.Augment_images(img)
      IMAGES[i,]=img
      #############################
      title=self.data.iloc[idx]['title']
      tokenized_title=self.tokenizer.encode_plus(title,padding="max_length",
                                                truncation="longest_first",max_length=self.max_length)
      
      INPUT_IDS[i,]=tokenized_title['input_ids']
      ATTENTION_MASK[i,]=tokenized_title['attention_mask']
      TOKEN_TYPE_IDS[i,]=tokenized_title['token_type_ids']
      if not self.inference:
        Y[i,]=labels
    if not self.inference:
      return ({"input_ids":INPUT_IDS,"attention_mask":ATTENTION_MASK,
            "token_type_ids": TOKEN_TYPE_IDS},IMAGES,Y),Y
    else:
      return ({"input_ids":INPUT_IDS,"attention_mask":ATTENTION_MASK,
            "token_type_ids": TOKEN_TYPE_IDS},IMAGES)

Writing utils/dataloaders.py


In [15]:
%%writefile training.py
"""
It is the training function for different types of models

"""

import argparse
import pathlib
import pandas as pd
import tensorflow as tf
from utils.models import *
from utils.randomness import *
from utils.dataloaders import *
from params import HYPERPARAMETERS


def CE(y_true,y_pred):
  '''
  loss function = y*log(y_hat)
  '''
  y_true=tf.cast(y_true,dtype=tf.int32)
  y_true=tf.one_hot(y_true,depth=11014)
  y_true=tf.cast(y_true,dtype=y_pred.dtype)
  ce_loss=y_true*tf.keras.backend.log(y_pred+1e-5)
  batch_loss=tf.reduce_sum(ce_loss,axis=-1)
  return -1*tf.reduce_mean(batch_loss)

def one_cycle(epoch,lr_min=1e-5,lr_max=2e-4):
  if epoch<5:
    lr=(lr_max-lr_min)/5 *(epoch) + lr_min
  elif epoch==5:
    lr=lr_max
  else:
    lr= (lr_max-lr_min) * 0.8**(epoch-5) +lr_min
  return lr

def TRAINING(args):
  df=pd.read_csv(args.data_path)
  for fold in range(5):
    train_data=df.loc[df['gfold']!=fold].drop("gfold",axis=1).reset_index(drop=True)
    test_data=df.loc[df['gfold']==fold].drop("gfold",axis=1).reset_index(drop=True)

    if args.model_type=="image":
      model=IMAGE_MODEL(image_size=HYPERPARAMETERS["image"]["image_size"],unfreeze_layers_number=HYPERPARAMETERS["image"]['unfreeze'])
      train_dataloader=IMG_DATA_LOADER(dataframe=train_data,image_size=HYPERPARAMETERS["image"]['image_size'],
                                       batch_size=args.batch_size,aug=True,shuffle=True)
      test_dataloader=IMG_DATA_LOADER(dataframe=test_data,image_size=HYPERPARAMETERS["image"]['image_size'],
                                      batch_size=args.batch_size,aug=False,shuffle=False)

    elif args.model_type=="text":
      model=TEXT_MODEL(pre_trained_name=HYPERPARAMETERS["text"]["pre_trained_name"],max_length=HYPERPARAMETERS["text"]["max_length"])
      train_dataloader=TEXT_DATA_LOADER(dataframe=train_data,max_length=HYPERPARAMETERS["text"]["max_length"],
                                        pre_trained_name=HYPERPARAMETERS["text"]["pre_trained_name"],batch_size=args.batch_size,shuffle=True)
      test_dataloader=TEXT_DATA_LOADER(dataframe=test_data,max_length=HYPERPARAMETERS["text"]["max_length"],
                                       pre_trained_name=HYPERPARAMETERS["text"]["pre_trained_name"],batch_size=args.batch_size,shuffle=False)

    else:
      model=COMBINE_MODEL(max_length=HYPERPARAMETERS["text"]["max_length"],image_size=HYPERPARAMETERS["image"]["image_size"],
                          unfreeze_layers_number=HYPERPARAMETERS["image"]['unfreeze'])
      train_dataloader=BOTH_DATA_LOADER(dataframe=train_data,batch_size=args.batch_size,
                                        image_size=HYPERPARAMETERS["image"]['image_size'],
                                        max_length=HYPERPARAMETERS["text"]["max_length"],
                                        text_pre_trained_name=HYPERPARAMETERS["text"]["pre_trained_name"],aug=True,shuffle=True)
      test_dataloader=BOTH_DATA_LOADER(dataframe=test_data,batch_size=args.batch_size,
                                       image_size=HYPERPARAMETERS["image"]['image_size'],
                                       max_length=HYPERPARAMETERS["text"]["max_length"],
                                       text_pre_trained_name=HYPERPARAMETERS["text"]["pre_trained_name"],aug=False,shuffle=False)
      
    model.compile("Adam",loss=CE)
    up_down=tf.keras.callbacks.LearningRateScheduler(lambda epoch: one_cycle(epoch),verbose=1)
    reduce_plat=tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss",mode="min",
                                                    patience=5,verbose=1,cooldown=2,
                                                    min_lr=1e-6)
    early=tf.keras.callbacks.EarlyStopping(monitor="val_loss",mode="min",verbose=1)
    saver=tf.keras.callbacks.ModelCheckpoint(filepath=args.save_model_path+f"{fold}.h5",
                                             monitor="val_loss",mode="min",save_best_only=True,
                                             save_weights_only=True)
    if fold==0:
      print(model.summary())
    model.fit(train_dataloader,validation_data=test_dataloader,epochs=args.epochs,
              callbacks=[early,saver,up_down] if args.lr_callback=="one_cycle" \
                         else [early,saver,reduce_plat] if args.lr_callback=="reduce_lr_plateau" \
              else [early,saver]
              )
    
    print(f"model training for {fold} is done")
    del model
    import gc
    gc.collect()
  

if __name__=="__main__":
  parser=argparse.ArgumentParser(description="training the models")
  parser.add_argument("--data_path",help="path to the data file", type=pathlib.Path,required = True)
  parser.add_argument("--model_type",help="type of model", choices=["image","text","both"],type=str,required = True)
  parser.add_argument("--batch_size",help="batch size for the model", type=int, default=32,required = True)
  parser.add_argument("--save_model_path",help="model path along with name where to save it", type=str,required = True)
  parser.add_argument("--epochs",help="number of epochs", type=int,default=30,required = True)
  parser.add_argument("--lr_callback",help="type of lr scheduler", choices=["one_cycle","reduce_lr_plateau","None"],
                      required=True,type=str)
  args=parser.parse_args()
  set_randomness()
  TRAINING(args)

Writing training.py


In [34]:
!git init

Reinitialized existing Git repository in /content/.git/


In [16]:
%%writefile .gitignore
train_images/
test_images/
sample_data/
sample_submission.csv
shopee-product-matching.zip
*.csv
.config/

Writing .gitignore


In [18]:
!git add .

The file will have its original line endings in your working directory


In [19]:
!git status

On branch main

No commits yet

Changes to be committed:
  (use "git rm --cached <file>..." to unstage)
	new file:   .gitignore
	new file:   EDA.py
	new file:   create_fold.py
	new file:   dvc.yaml
	new file:   latest_py_files.ipynb
	new file:   params.py
	new file:   params.yaml
	new file:   training.py
	new file:   utils/dataloaders.py
	new file:   utils/models.py
	new file:   utils/randomness.py



In [38]:
!git config --global user.name "RavitejaBadugu"
!git config --global user.email "ravi14ashwin@gmail.com"

In [20]:
!git commit -m "initial files"

[main (root-commit) de0b264] initial files
 11 files changed, 1732 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 EDA.py
 create mode 100644 create_fold.py
 create mode 100644 dvc.yaml
 create mode 100644 latest_py_files.ipynb
 create mode 100644 params.py
 create mode 100644 params.yaml
 create mode 100644 training.py
 create mode 100644 utils/dataloaders.py
 create mode 100644 utils/models.py
 create mode 100644 utils/randomness.py


In [21]:
!git branch -M main

In [22]:
!git remote add origin git@github.com:RavitejaBadugu/shop_duplicate.git

error: remote origin already exists.


In [23]:
!git push -u origin main

Branch 'main' set up to track remote branch 'main' from 'origin'.


To https://github.com/RavitejaBadugu/shop_duplicate.git
 * [new branch]      main -> main
