In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pathlib
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization, Embedding, LSTM, Bidirectional, Dense, \
 AdditiveAttention, LayerNormalization, Add

In [None]:
project_dir = "/content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project"

In [None]:
train_file_path = os.path.join(project_dir, 'data/preprocessed/finished_files/final_train.csv')
val_file_path = os.path.join(project_dir, 'data/preprocessed/finished_files/final_val.csv')
test_file_path = os.path.join(project_dir, 'data/preprocessed/finished_files/final_test.csv')

In [None]:
def load_data(path):
  data = pd.read_csv(path, header=None)
  data.columns = ['article', 'highlight']
  articles, summaries = data['article'].to_numpy(), data['highlight'].to_numpy()

  return articles, summaries

In [None]:
train_articles_raw, train_summaries_raw = load_data(train_file_path)
val_articles_raw, val_summaries_raw = load_data(val_file_path)
test_articles_raw, test_summaries_raw = load_data(test_file_path)

In [None]:
BATCH_SIZE = 16 # From the paper

BUFFER_SIZE = len(train_articles_raw)

train_raw = tf.data.Dataset \
            .from_tensor_slices((train_articles_raw, train_summaries_raw)) \
            .shuffle(BUFFER_SIZE, seed=2023) \
            .batch(BATCH_SIZE)

val_raw = tf.data.Dataset \
          .from_tensor_slices((val_articles_raw, val_summaries_raw)) \
          .shuffle(BUFFER_SIZE, seed=2023) \
          .batch(BATCH_SIZE)

test_raw = tf.data.Dataset \
          .from_tensor_slices((test_articles_raw, test_summaries_raw)) \
          .shuffle(BATCH_SIZE, seed=2023) \
          .batch(BATCH_SIZE)

In [None]:
for example_article_strings, example_summary_strings in train_raw.take(1):
  print(example_article_strings[:5])
  print()
  print(example_summary_strings[:5])
  break

tf.Tensor(
[b"by . tara brady . a teenager has become the only apprentice wheelwright in britain working at one of only two firms appointed to take care of the queen \xe2\x80\x99 s historic royal carriages . george richards , 17 , has been learning his trade at mike rowland wheelwrights and coachbuilders since september . as part of his three-year training scheme at the smoke and steam-filled workshop in colyton , devon , he has now produced an unusual wooden bicycle for a special visit from the master of the worshipful company of wheelwrights . skills : george richards ( left ) has become an apprentice wheelwright at one of only two firms appointed to take care of the queen 's historic royal carriages . he has been learning the trade at mike rowland wheelwrights . the youngster , who also studies joinery at exeter college , has impressed the old hands at the business - which was formed in 1964 but can be traced back through the family as far as 1360 . greg rowland , 44 , who has now t

In [None]:
# Retrieve the vocabularies using the vocab file
with open(os.path.join(project_dir, 'data/preprocessed/finished_files/vocab'), 'r') as reader:
  vocab = reader.read()

In [None]:
vocab[:20]

'. 12012810\nthe 11803'

In [None]:
vocab.split('\n')[:2]

['. 12012810', 'the 11803789']

In [None]:
[v.split(' ')[0] for v in vocab.split('\n')[:5]]

['.', 'the', ',', 'to', 'a']

In [None]:
vocab_data = [v.split(' ')[0] for v in vocab.split('\n')]

In [None]:
# In the vocabolary we already have the masked token, so we need to remove it
# otherwise TextVectorization would give ValuesError
vocab_data.index('')

200000

In [None]:
vocab_data.remove('')

In [None]:
# Add it to the vocab_data as well as to all the texts
vocab_data.insert(0, '[START]')
vocab_data.insert(1, '[END]')

In [None]:
# As we do not have [START] and [END] included in our context or target,
# let's include them in a function and pass it to the TextVectorization
def add_start_end(text):
  # If already any [START] or [END] token is present, remove them
  text = tf.strings.regex_replace(text, '[START]', '')
  text = tf.strings.regex_replace(text, '[END]', '')

  text = tf.strings.strip(text)
  text = tf.strings.join(['[START]', text, '[END]'], separator=' ')

  return text

In [None]:
example_text = tf.constant('how are you ?')
print(example_text)
print(example_text.numpy().decode())
print(add_start_end(example_text).numpy().decode())

tf.Tensor(b'how are you ?', shape=(), dtype=string)
how are you ?
[START] how are you ? [END]


In [None]:
vocab_data[:10]

['[START]', '[END]', '.', 'the', ',', 'to', 'a', 'and', 'of', 'in']

In [None]:
max_article_tokens = 400
max_summary_tokens = 100

article_vocab_size = 160000
summary_vocab_size = 50000

article_processor = TextVectorization(max_tokens=article_vocab_size+2, # 2 extra for '', [UNK]
                                      standardize=add_start_end,
                                      vocabulary=vocab_data[:article_vocab_size],
                                      output_sequence_length=max_article_tokens)

summary_processor = TextVectorization(max_tokens=summary_vocab_size+2, # 2 extra for '', [UNK]
                                      standardize=add_start_end,
                                      vocabulary=vocab_data[:summary_vocab_size],
                                      output_sequence_length=max_summary_tokens)

In [None]:
article_processor.get_vocabulary()[:10]

['', '[UNK]', '[START]', '[END]', '.', 'the', ',', 'to', 'a', 'and']

In [None]:
example_article_strings[0]

<tf.Tensor: shape=(), dtype=string, numpy=b"by . tara brady . a teenager has become the only apprentice wheelwright in britain working at one of only two firms appointed to take care of the queen \xe2\x80\x99 s historic royal carriages . george richards , 17 , has been learning his trade at mike rowland wheelwrights and coachbuilders since september . as part of his three-year training scheme at the smoke and steam-filled workshop in colyton , devon , he has now produced an unusual wooden bicycle for a special visit from the master of the worshipful company of wheelwrights . skills : george richards ( left ) has become an apprentice wheelwright at one of only two firms appointed to take care of the queen 's historic royal carriages . he has been learning the trade at mike rowland wheelwrights . the youngster , who also studies joinery at exeter college , has impressed the old hands at the business - which was formed in 1964 but can be traced back through the family as far as 1360 . gre

In [None]:
example_article_tokens = article_processor(example_article_strings)
example_article_tokens[0]

<tf.Tensor: shape=(400,), dtype=int64, numpy=
array([     2,     28,      4,   8504,   4611,      4,      8,   1529,
           32,    379,      5,    113,  11606, 138007,     11,    416,
          349,     25,     56,     10,    113,     74,   3150,   3750,
            7,    150,    383,     10,      5,    967,     31,     67,
         2176,    619,  14913,      4,    748,   6002,      6,    835,
            6,     32,     48,   2308,     24,   1663,     25,   1773,
        20137,      1,      9, 118646,    140,    491,      4,     26,
          191,     10,     24,   7056,    601,   2329,     25,      5,
         2310,      9,      1,  10791,     11, 112167,      6,   4135,
            6,     18,     32,     98,   2008,     41,   2139,   4616,
         7500,     13,      8,    717,    692,     29,      5,   3818,
           10,      5,  97512,    276,     10,      1,      4,   2350,
           23,    748,   6002,     54,    114,     53,     32,    379,
           41,  11606, 138007, 

In [None]:
summary_processor.get_vocabulary()[:10]

['', '[UNK]', '[START]', '[END]', '.', 'the', ',', 'to', 'a', 'and']

In [None]:
example_summary_strings[0]

<tf.Tensor: shape=(), dtype=string, numpy=b"[START] george richards , 17 , has been learning his trade since september . he is working at devon 's mike rowland wheelwrights and coachbuilders . the company is one of two firms which takes care of queen 's carriages . a wheelwright is someone who builds or repairs wooden wheels . [END]">

In [None]:
example_summary_tokens = summary_processor(example_summary_strings)
example_summary_tokens[0]

<tf.Tensor: shape=(100,), dtype=int64, numpy=
array([    2,     1,   748,  6002,     6,   835,     6,    32,    48,
        2308,    24,  1663,   140,   491,     4,    18,    17,   349,
          25,  4135,    16,  1773, 20137,     1,     9,     1,     4,
           5,   276,    17,    56,    10,    74,  3150,    57,   915,
         383,    10,   967,    16, 14913,     4,     8,     1,    17,
         582,    44, 11017,    63,  8099,  4616,  5659,     4,     1,
           3,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0])>

In [None]:
def process_text(articles, summaries):
  encoder_in = article_processor(articles)
  target = summary_processor(summaries)
  target_in = target[:, :-1]
  target_out = target[:, 1:]

  return (encoder_in, target_in), target_out


train_ds = train_raw.map(process_text, tf.data.AUTOTUNE)
val_ds = val_raw.map(process_text, tf.data.AUTOTUNE)
test_ds = test_raw.map(process_text, tf.data.AUTOTUNE)

In [None]:
for (ex_context_tok, ex_tar_in), ex_tar_out in train_ds.take(1):
  print(ex_context_tok[0, :10].numpy())
  print()
  print(ex_tar_in[0, :10].numpy())
  print(ex_tar_out[0, :10].numpy())

[    2   550    16  6164   721     7  1023     8 10139 20460]

[    2     1  7618  3993    28   351 14978    10   550    16]
[    1  7618  3993    28   351 14978    10   550    16  8767]


In [None]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self, emb_out, lstm_units, text_processor):
    super(Encoder, self).__init__()
    self.emb_out = emb_out
    self.lstm_units = lstm_units
    self.text_processor = text_processor
    self.vocab_size = text_processor.vocabulary_size()

    # Embedding layer will convert the tokens into vectors
    self.embedding = Embedding(self.vocab_size, self.emb_out)

    # Using Bidirectional layer with lstm cells
    self.rnn = Bidirectional(
        layer= LSTM(self.lstm_units,
                    # Return all output sequences
                    return_sequences=True),
        merge_mode='sum')


  def call(self, x):
    x = self.embedding(x)

    x = self.rnn(x)

    return x

  def convert_inputs(self, texts):
    texts = tf.convert_to_tensor(texts)

    if(len(texts.shape) == 0):
      texts = tf.convert_to_tensor(texts)[tf.newaxis]

    articles = self.text_processor(texts)
    enc_articles = self(articles)

    return enc_articles

In [None]:
class CrossAttention(tf.keras.layers.Layer):
  def __init__(self, use_scale=True, **kwargs):
    super(CrossAttention, self).__init__()

    self.additive_attention = AdditiveAttention(use_scale=use_scale, **kwargs)
    self.layernorm = LayerNormalization()
    self.add = Add()


  def call(self, x, context):
    attn_output, attn_scores = self.additive_attention(
        [x, context],
        return_attention_scores=True
    )

    attn_scores = tf.reduce_mean(attn_scores, axis=1)
    self.last_attention_weights = attn_scores

    x = self.add([x, attn_output])
    x = self.layernorm(x)

    return x

In [None]:
class Decoder(tf.keras.layers.Layer):
  @classmethod
  def add_method(cls, fun):
    setattr(cls, fun.__name__, fun)
    return fun

  def __init__(self, units, text_processor):
    super(Decoder, self).__init__()

    self.text_processor = text_processor
    self.vocab_size = text_processor.vocabulary_size()
    self.word_to_id = tf.keras.layers.StringLookup(vocabulary=text_processor.get_vocabulary(),
                                                   mask_token='',
                                                   oov_token='[UNK]')
    self.id_to_word = tf.keras.layers.StringLookup(vocabulary=text_processor.get_vocabulary(),
                                                   mask_token='',
                                                   oov_token='[UNK]',
                                                   invert=True)
    self.start_token = self.word_to_id('[START]')
    self.end_token = self.word_to_id('[END]')

    self.units = units

    self.embedding = Embedding(self.vocab_size, self.units, mask_zero=True)
    self.rnn = LSTM(self.units, return_sequences=True, return_state=True)
    self.attention = CrossAttention()
    self.output_layer = Dense(self.vocab_size)


  def call(self, context, x, state=None, return_state=False, return_attn=False):
    dec_in = self.embedding(x)

    x, h_state, m_state = self.rnn(dec_in, initial_state=state)

    x = self.attention(x, context)
    self.last_attention_weights = self.attention.last_attention_weights


    logits = self.output_layer(x)

    if return_state:
      out = (logits, [h_state, m_state])

    else:
      out = logits


    return out

In [None]:
class Summarizer(tf.keras.Model):

  @classmethod
  def add_method(cls, fun):
    setattr(cls, fun.__name__, fun)
    return fun

  def __init__(self, enc_emb_dim, units,
               article_processor, summary_processor):

    super().__init__()

    self.encoder = Encoder(enc_emb_dim, units, article_processor)
    self.decoder = Decoder(units, summary_processor)

  def call(self, inputs):
    encoder_in, x = inputs
    context = self.encoder(encoder_in)
    logits = self.decoder(context, x)

    try:
      # Delete the keras mask, so keras doesn't scale the loss+accuracy.
      del logits._keras_mask
    except AttributeError:
      pass

    return logits

In [None]:
UNITS = 256
EMB_DIM = 128
max_article_tokens = 400
vocab_size = summary_processor.vocabulary_size()

model = Summarizer(EMB_DIM, UNITS,
                   article_processor, summary_processor)

In [None]:
def masked_loss(y_true, y_pred):
    # Calculate the loss for each item in the batch.
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')
    loss = loss_fn(y_true, y_pred)

    # Mask off the losses on padding.
    mask = tf.cast(y_true != 0, loss.dtype)
    loss *= mask

    # Return the total.
    return tf.reduce_sum(loss)/tf.reduce_sum(mask)

In [None]:
def masked_acc(y_true, y_pred):
    # Calculate the loss for each item in the batch.
    y_pred = tf.argmax(y_pred, axis=-1)
    y_pred = tf.cast(y_pred, y_true.dtype)

    match = tf.cast(y_true == y_pred, tf.float32)
    mask = tf.cast(y_true != 0, tf.float32)

    return tf.reduce_sum(match)/tf.reduce_sum(mask)

In [None]:
adagrad_opt = tf.keras.optimizers.experimental.Adagrad(
    learning_rate=0.15,
    initial_accumulator_value=0.1,
    clipnorm=2,
)

model.compile(optimizer=adagrad_opt,
              loss=masked_loss,
              metrics=[masked_acc, masked_loss],
              run_eagerly=True)

In [None]:
model.evaluate(val_ds, steps=20, return_dict=True)



{'loss': 10.819047927856445,
 'masked_acc': 0.5702838897705078,
 'masked_loss': 10.819047927856445}

In [None]:
model_dir = os.path.join(project_dir, "models")
model_path = os.path.join(model_dir,"weights.best.hdf5")

history = model.fit(
    train_ds.repeat(),
    epochs=50,
    steps_per_epoch = 1000,
    validation_data=val_ds,
    validation_steps = 20,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor="masked_acc", patience=3),

        tf.keras.callbacks.ModelCheckpoint(model_path, monitor="masked_acc", verbose=1,
                                           save_best_only=True,
                                           save_weights_only=True,
                                           mode='max')])

Epoch 1/50




Epoch 1: masked_acc improved from -inf to 0.11282, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 2/50
Epoch 2: masked_acc improved from 0.11282 to 0.16502, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 3/50
Epoch 3: masked_acc improved from 0.16502 to 0.18634, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 4/50
Epoch 4: masked_acc improved from 0.18634 to 0.19899, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 5/50
Epoch 5: masked_acc improved from 0.19899 to 0.20799, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 6/50
Epoch 6: masked_acc improved from 0.20799 to 0.21357, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-In

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.ylim([0, max(plt.ylim())])
plt.xlabel('Epoch #')
plt.ylabel('CE/token')
plt.legend()

In [None]:
@Decoder.add_method
def get_initial_state(self, articles):
  batch_size = tf.shape(articles)[0]
  start_tokens = tf.fill([batch_size, 1], self.start_token)
  done = tf.zeros([batch_size, 1], dtype=tf.bool)
  embedded = self.embedding(start_tokens)
  return start_tokens, done, self.rnn.get_initial_state(embedded)

In [None]:
@Decoder.add_method
def tokens_to_text(self, tokens):
  words = self.id_to_word(tokens)
  result = tf.strings.reduce_join(words, axis=-1, separator=' ')
  result = tf.strings.regex_replace(result, '^ *\[START\] *', '')
  result = tf.strings.regex_replace(result, ' *\[END\] *$', '')
  return result

In [None]:
@Decoder.add_method
def get_next_token(self, article, next_token, done, state):
  logits, state = self(
    article, next_token,
    state = state,
    return_state=True)

  next_token = tf.argmax(logits, axis=-1)

  # If a sequence produces an `end_token`, set it `done`
  done = done | (next_token == self.end_token)
  # Once a sequence is done it only produces 0-padding.
  next_token = tf.where(done, tf.constant(0, dtype=tf.int64), next_token)

  return next_token, done, state

In [None]:
UNITS = 256
EMB_DIM = 128

model_dir = os.path.join(project_dir, "models")
model_path = os.path.join(model_dir,"weights.best.hdf5")

summarizer = Summarizer(EMB_DIM, UNITS, article_processor, summary_processor)

In [None]:
adagrad_opt = tf.keras.optimizers.experimental.Adagrad(
    learning_rate=0.15,
    initial_accumulator_value=0.1,
    clipnorm=2,
)

summarizer.compile(optimizer=adagrad_opt,
              loss=masked_loss,
              metrics=[masked_acc, masked_loss])

In [None]:
summarizer.evaluate(val_ds, steps=20, return_dict=True)



{'loss': 10.820549011230469,
 'masked_acc': 0.5702435374259949,
 'masked_loss': 10.820549011230469}

In [None]:
summarizer.load_weights(model_path)

In [None]:
article = ["andy murray came close to giving himself some extra preparation time for his wedding next week before ensuring that he still has unfinished \
tennis business to attend to . the world no 4 is into the semi-finals of the miami open , but not before getting a scare from 21 year-old austrian dominic \
thiem , who pushed him to 4-4 in the second set before going down 3-6 6-4 , 6-1 in an hour and three quarters . murray was awaiting the winner from the \
last eight match between tomas berdych and argentina 's juan monaco . prior to this tournament thiem lost in the second round of a challenger event to \
soon-to-be new brit aljaz bedene . andy murray pumps his first after defeating dominic thiem to reach the miami open semi finals . muray throws his sweatband \
into the crowd after completing a 3-6 , 6-4 , 6-1 victory in florida . murray shakes hands with thiem who he described as a ' strong guy ' \
after the game . and murray has a fairly simple message for any of his fellow british tennis players who might be agitated about his imminent arrival \
into the home ranks : do n't complain . instead the british no 1 believes his colleagues should use the assimilation of the world number 83 , originally from \
slovenia , as motivation to better themselves ."]

In [None]:
@Summarizer.add_method
def summarize(self,
              texts, *,
              max_length=120):
  # Process the input texts
  context = self.encoder.convert_inputs(texts)
  batch_size = tf.shape(texts)[0]

  # Setup the loop inputs
  tokens = []
  next_token, done, state = self.decoder.get_initial_state(context)

  for _ in range(max_length):
    # Generate the next token
    next_token, done, state = self.decoder.get_next_token(
        context, next_token, done,  state)

    # Collect the generated tokens
    tokens.append(next_token)


  # Stack the lists of tokens and attention weights.
  tokens = tf.concat(tokens, axis=-1)   # t*[(batch 1)] -> (batch, t)

  result = self.decoder.tokens_to_text(tokens)
  return result

In [None]:
summarizer.summarize(article)

<tf.Tensor: shape=(1,), dtype=string, numpy=
array([b'[UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK]'],
      dtype=object)>

In [None]:
# Train the model more after upgrading it with newest weights
model_dir = os.path.join(project_dir, "models")
model_path = os.path.join(model_dir,"weights.best.hdf5")

adagrad_opt = tf.keras.optimizers.experimental.Adagrad(
    learning_rate=0.15,
    initial_accumulator_value=0.1,
    clipnorm=2,
)

summarizer.compile(optimizer=adagrad_opt,
              loss=masked_loss,
              metrics=[masked_acc, masked_loss])

summarizer.evaluate(val_ds, steps=20, return_dict=True)

summarizer.load_weights(model_path)

history = summarizer.fit(
    train_ds.repeat(),
    epochs=20,
    steps_per_epoch = 100,
    validation_data=val_ds,
    validation_steps = 20,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor="masked_acc", patience=3),

        tf.keras.callbacks.ModelCheckpoint(model_path, monitor="masked_acc", verbose=1,
                                           save_best_only=True,
                                           save_weights_only=True,
                                           mode='max')])

Epoch 1/20
Epoch 1: masked_acc improved from -inf to 0.15678, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 2/20
Epoch 2: masked_acc improved from 0.15678 to 0.21303, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 3/20
Epoch 3: masked_acc improved from 0.21303 to 0.21528, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 4/20
Epoch 4: masked_acc improved from 0.21528 to 0.21719, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 5/20
Epoch 5: masked_acc improved from 0.21719 to 0.22129, saving model to /content/drive/MyDrive/Projects/Suvidha-Foundation-Internship-Project/models/weights.best.hdf5
Epoch 6/20
Epoch 6: masked_acc improved from 0.22129 to 0.22307, saving model to /content/drive/MyDrive/Projects/Suvidha-Fo