In [1]:
!pip uninstall nltk -y
!pip install nltk

Found existing installation: nltk 3.9.1
Uninstalling nltk-3.9.1:
  Successfully uninstalled nltk-3.9.1
Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nltk
Successfully installed nltk-3.9.1


# TXT Parsing and Text Extraction


In [4]:
import nltk
import os
from collections import Counter

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def process_txts_in_folder(folder_path):
    total_text = []  # To accumulate the text from all text files

    # Get list of all .txt files in the folder
    txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

    for txt_file in txt_files:
        txt_path = os.path.join(folder_path, txt_file)
        print(f"Processing: {txt_path}")

        # Open and read the content of the .txt file
        with open(txt_path, 'r', encoding='utf-8') as file:
            text = file.read()

        # Add the text to the total_text list
        total_text.append(text)

    # Return the total concatenated text
    return "\n\n".join(total_text)


folder_path = "/content/data"
all_text = process_txts_in_folder(folder_path)


Processing: /content/data/Mycoplasma pneumoniae.txt
Processing: /content/data/Phase de l'infection aiguë.txt
Processing: /content/data/Culture virale.txt
Processing: /content/data/ANCA.txt
Processing: /content/data/GGT (Gamma GT).txt
Processing: /content/data/Monocytes.txt
Processing: /content/data/Culture de champignons.txt
Processing: /content/data/Test de stimulation à la GnRH.txt
Processing: /content/data/Métanéphrines.txt
Processing: /content/data/Chlore (Cl-).txt
Processing: /content/data/Gradient alvéolo-artériel.txt
Processing: /content/data/17-OH progestérone.txt
Processing: /content/data/Hématologiques.txt
Processing: /content/data/Coproculture (selles).txt
Processing: /content/data/Neutrophiles.txt
Processing: /content/data/Potassium.txt
Processing: /content/data/Fibrinogène.txt
Processing: /content/data/FSH et LH (hormones gonadotropes).txt
Processing: /content/data/Streptocoque A.txt
Processing: /content/data/Créatinine.txt
Processing: /content/data/CA 19-9.txt
Processing:

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


# Custom Text Chunking


In [5]:
def simple_text_splitter(text: str, chunk_size: int, overlap: int) -> list:
    """
    Splits the input text into chunks of a specified size, with optional overlap.

    Parameters:
    - text: The input text to be split.
    - chunk_size: The maximum size of each chunk (in terms of characters).
    - overlap: The number of overlapping characters between consecutive chunks.

    Returns:
    - A list of text chunks, with or without overlap.
    """

    # Initialize variables
    chunks = []
    start = 0
    text_length = len(text)

    while start < text_length:
        # Determine end of the chunk
        end = min(start + chunk_size, text_length)
        chunk = text[start:end].strip()  # Get chunk, and remove any leading/trailing spaces
        chunks.append(chunk)

        # Move the starting point, factoring in the overlap
        start = end - overlap

    return chunks


# Example usage
chunks = simple_text_splitter(text=all_text, chunk_size=2048, overlap=0)

# Print the number of chunks and the first one as an example
print(f"Total chunks: {len(chunks)}")
print(f"First chunk: {chunks[0]}")


Total chunks: 354
First chunk: Analyses Sérologiques : Sérologies Bactériennes

Mycoplasma pneumoniae
1. Définition et Agent Causal
Mycoplasma pneumoniae est une bactérie de petite taille qui appartient à la classe des mycoplasmes. Elle est une cause fréquente de pneumonie communautaire (Pneumonie atypique) et est responsable d'infections respiratoires, souvent de manière asymptomatique ou bénigne. En raison de son absence de paroi cellulaire, elle est naturellement résistante à certains antibiotiques, notamment les bêta-lactamines (comme la pénicilline).

2. Tests Sérologiques Utilisés pour Diagnostiquer Mycoplasma pneumoniae
Les tests sérologiques pour Mycoplasma pneumoniae sont utilisés pour détecter les anticorps contre la bactérie dans le but de diagnostiquer une infection active ou antérieure.

Les tests sérologiques les plus couramment utilisés pour la détection des infections à Mycoplasma pneumoniae incluent :

Test ELISA (Enzyme-Linked Immunosorbent Assay) :

Principe : Ce tes

# Dataset Generator


In [6]:
import google.generativeai as genai
import pandas as pd

# Replace with your valid Google API key
GOOGLE_API_KEY = "AIzaSyBqhiWmNDnfsVUf2e7n-2sAhOVKSCkmS8c"

# Prompt generator with an explicit request for structured output
def prompt(text_chunk):
    return f"""
      Basé sur le texte suivant, générez une question et sa réponse correspondante. Veuillez formater la sortie comme suit :

      Question : [Votre question]
      Réponse : [Votre réponse]
      Texte : {text_chunk}
    """
# Function to interact with Google's Gemini and return a QA pair
def generate_with_gemini(text_chunk:str, temperature:float, model_name:str):
    genai.configure(api_key=GOOGLE_API_KEY)
    generation_config = {"temperature": temperature}

    # Initialize the generative model
    gen_model = genai.GenerativeModel(model_name, generation_config=generation_config)

    # Generate response based on the prompt
    response = gen_model.generate_content(prompt(text_chunk))

    # Extract question and answer from response using keyword
    try:
        question, answer = response.text.split("Answer:", 1)
        question = question.replace("Question:", "").strip()
        answer = answer.strip()
    except ValueError:
        question, answer = "N/A", "N/A"  # Handle unexpected format in response

    return question, answer

### The generate_with_gemini function interacts with the Gemini model and generates a QA pair using the created prompt.

# Running Q&A Generation

In [7]:
def process_text_chunks(text_chunks:list, temperature:int, model_name=str):
    results = []

    # Iterate through each text chunk
    for chunk in text_chunks:
        question, answer = generate_with_gemini(chunk, temperature, model_name)
        results.append({"Text Chunk": chunk, "Question": question, "Answer": answer})

    # Convert results into a Pandas DataFrame
    df = pd.DataFrame(results)
    return df
# Process the text chunks and get the DataFrame
df_results = process_text_chunks(text_chunks=chunks,
                                 temperature=0.7,
                                 model_name="gemini-1.5-flash")
df_results.to_csv("generated_qa_pairs.csv", index=False)



TooManyRequests: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).

### The error TooManyRequests with status code 429 indicates that the request limit has been exceeded, likely due to hitting the rate limit or quota for the Gemini model's API. This can happen when too many requests are sent within a short time frame, causing the API to temporarily block further requests.

### we already prepare our dataset , and we will upload it directly from HuggingFace

# Loading the Dataset


In [8]:
pip install datasets


Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [9]:
from datasets import load_dataset

# Load the dataset directly from Hugging Face Hub
dataset = load_dataset("ilyass20/MedAnalyzer")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

neww_data4.json:   0%|          | 0.00/1.03M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2182 [00:00<?, ? examples/s]

# Loading the Model


In [10]:
import torch
from sentence_transformers import SentenceTransformer
from sentence_transformers.evaluation import (
    InformationRetrievalEvaluator,
    SequentialEvaluator,
)
from sentence_transformers.util import cos_sim
from datasets import load_dataset, concatenate_datasets
from sentence_transformers.losses import MatryoshkaLoss, MultipleNegativesRankingLoss


model_id = "intfloat/multilingual-e5-large"

# Load a model
model = SentenceTransformer(
    model_id, device="cuda" if torch.cuda.is_available() else "cpu"
)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/160k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

# Defining the Loss Function


In [11]:
# Important: large to small
matryoshka_dimensions = [1024, 768, 512, 256, 128, 64]
inner_train_loss = MultipleNegativesRankingLoss(model)
train_loss = MatryoshkaLoss(
    model, inner_train_loss, matryoshka_dims=matryoshka_dimensions
)

# Defining Training Arguments


In [12]:
from sentence_transformers import SentenceTransformerTrainingArguments
from sentence_transformers.training_args import BatchSamplers

# define training arguments
args = SentenceTransformerTrainingArguments(
    output_dir="bge-finetuned",                 # output directory and hugging face model ID
    num_train_epochs=1,                         # number of epochs
    per_device_train_batch_size=4,              # train batch size
    gradient_accumulation_steps=16,             # for a global batch size of 512
    per_device_eval_batch_size=16,              # evaluation batch size
    warmup_ratio=0.1,                           # warmup ratio
    learning_rate=2e-5,                         # learning rate, 2e-5 is a good value
    lr_scheduler_type="cosine",                 # use constant learning rate scheduler
    optim="adamw_torch_fused",                  # use fused adamw optimizer
    bf16=True,                                  # use bf16 precision
    batch_sampler=BatchSamplers.NO_DUPLICATES,  # MultipleNegativesRankingLoss benefits from no duplicate samples in a batch
    eval_strategy="epoch",                      # evaluate after each epoch
    save_strategy="epoch",                      # save after each epoch
    logging_steps=10,                           # log every 10 steps
    save_total_limit=3,                         # save only the last 3 models
    load_best_model_at_end=True,                # load the best model when training ends
    metric_for_best_model="eval_dim_128_cosine_ndcg@10",  # Optimizing for the best ndcg@10 score for the 128 dimension
)

# Creating the Evaluator


In [13]:
# Assuming `dataset['train']` contains the 'anchor' and 'positive' columns.
# Use indices as unique identifiers for queries and corpus.

corpus = dict(
    zip(range(len(dataset['train']['positive'])),  # Use row indices as unique IDs
        dataset['train']['positive'])
)  # Corpus (cid => document)

queries = dict(
    zip(range(len(dataset['train']['anchor'])),  # Use row indices as unique IDs
        dataset['train']['anchor'])
)  # Queries (qid => anchor)

# Create a mapping of relevant documents (1 in our case) for each query
relevant_docs = {}  # Query ID to relevant documents (qid => set([relevant_cids])
for q_id in queries:
    relevant_docs[q_id] = [q_id]  # Assuming each query has its own relevant document (positive)

matryoshka_evaluators = []
# Iterate over the different dimensions
for dim in matryoshka_dimensions:
    ir_evaluator = InformationRetrievalEvaluator(
        queries=queries,
        corpus=corpus,
        relevant_docs=relevant_docs,
        name=f"dim_{dim}",
        truncate_dim=dim,  # Truncate the embeddings to a certain dimension
        score_functions={"cosine": cos_sim},
    )
    matryoshka_evaluators.append(ir_evaluator)

# Create a sequential evaluator
evaluator = SequentialEvaluator(matryoshka_evaluators)


# Evaluating the Model Before Fine-tuning


In [14]:
results = evaluator(model)

for dim in matryoshka_dimensions:
    key = f"dim_{dim}_cosine_ndcg@10"
    print(f"{key}: {results[key]}")

dim_1024_cosine_ndcg@10: 0.796686101130705
dim_768_cosine_ndcg@10: 0.7908571449288243
dim_512_cosine_ndcg@10: 0.7897130065549273
dim_256_cosine_ndcg@10: 0.7522047002299088
dim_128_cosine_ndcg@10: 0.6801301655511952
dim_64_cosine_ndcg@10: 0.5182511586022086


# Defining the Trainer


In [15]:
from sentence_transformers import SentenceTransformerTrainer

trainer = SentenceTransformerTrainer(
    model=model, # our embedding model
    args=args,  # training arguments we defined above
    train_dataset=dataset.select_columns(
        ["anchor", "positive"]
    ),
    loss=train_loss, # Matryoshka loss
    evaluator=evaluator, # Sequential Evaluator
)

# Starting Fine-tuning


In [16]:
# start training
trainer.train()
# save the best model
trainer.save_model()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Dim 1024 Cosine Accuracy@1,Dim 1024 Cosine Accuracy@3,Dim 1024 Cosine Accuracy@5,Dim 1024 Cosine Accuracy@10,Dim 1024 Cosine Precision@1,Dim 1024 Cosine Precision@3,Dim 1024 Cosine Precision@5,Dim 1024 Cosine Precision@10,Dim 1024 Cosine Recall@1,Dim 1024 Cosine Recall@3,Dim 1024 Cosine Recall@5,Dim 1024 Cosine Recall@10,Dim 1024 Cosine Ndcg@10,Dim 1024 Cosine Mrr@10,Dim 1024 Cosine Map@100,Dim 768 Cosine Accuracy@1,Dim 768 Cosine Accuracy@3,Dim 768 Cosine Accuracy@5,Dim 768 Cosine Accuracy@10,Dim 768 Cosine Precision@1,Dim 768 Cosine Precision@3,Dim 768 Cosine Precision@5,Dim 768 Cosine Precision@10,Dim 768 Cosine Recall@1,Dim 768 Cosine Recall@3,Dim 768 Cosine Recall@5,Dim 768 Cosine Recall@10,Dim 768 Cosine Ndcg@10,Dim 768 Cosine Mrr@10,Dim 768 Cosine Map@100,Dim 512 Cosine Accuracy@1,Dim 512 Cosine Accuracy@3,Dim 512 Cosine Accuracy@5,Dim 512 Cosine Accuracy@10,Dim 512 Cosine Precision@1,Dim 512 Cosine Precision@3,Dim 512 Cosine Precision@5,Dim 512 Cosine Precision@10,Dim 512 Cosine Recall@1,Dim 512 Cosine Recall@3,Dim 512 Cosine Recall@5,Dim 512 Cosine Recall@10,Dim 512 Cosine Ndcg@10,Dim 512 Cosine Mrr@10,Dim 512 Cosine Map@100,Dim 256 Cosine Accuracy@1,Dim 256 Cosine Accuracy@3,Dim 256 Cosine Accuracy@5,Dim 256 Cosine Accuracy@10,Dim 256 Cosine Precision@1,Dim 256 Cosine Precision@3,Dim 256 Cosine Precision@5,Dim 256 Cosine Precision@10,Dim 256 Cosine Recall@1,Dim 256 Cosine Recall@3,Dim 256 Cosine Recall@5,Dim 256 Cosine Recall@10,Dim 256 Cosine Ndcg@10,Dim 256 Cosine Mrr@10,Dim 256 Cosine Map@100,Dim 128 Cosine Accuracy@1,Dim 128 Cosine Accuracy@3,Dim 128 Cosine Accuracy@5,Dim 128 Cosine Accuracy@10,Dim 128 Cosine Precision@1,Dim 128 Cosine Precision@3,Dim 128 Cosine Precision@5,Dim 128 Cosine Precision@10,Dim 128 Cosine Recall@1,Dim 128 Cosine Recall@3,Dim 128 Cosine Recall@5,Dim 128 Cosine Recall@10,Dim 128 Cosine Ndcg@10,Dim 128 Cosine Mrr@10,Dim 128 Cosine Map@100,Dim 64 Cosine Accuracy@1,Dim 64 Cosine Accuracy@3,Dim 64 Cosine Accuracy@5,Dim 64 Cosine Accuracy@10,Dim 64 Cosine Precision@1,Dim 64 Cosine Precision@3,Dim 64 Cosine Precision@5,Dim 64 Cosine Precision@10,Dim 64 Cosine Recall@1,Dim 64 Cosine Recall@3,Dim 64 Cosine Recall@5,Dim 64 Cosine Recall@10,Dim 64 Cosine Ndcg@10,Dim 64 Cosine Mrr@10,Dim 64 Cosine Map@100,Sequential Score
0,1.6676,No log,0.727314,0.900092,0.925756,0.943171,0.727314,0.300031,0.185151,0.094317,0.727314,0.900092,0.925756,0.943171,0.848518,0.816784,0.818873,0.722273,0.898717,0.924381,0.943171,0.722273,0.299572,0.184876,0.094317,0.722273,0.898717,0.924381,0.943171,0.846137,0.813652,0.815758,0.722273,0.896884,0.922548,0.94363,0.722273,0.298961,0.18451,0.094363,0.722273,0.896884,0.922548,0.94363,0.845962,0.813317,0.815288,0.712191,0.889093,0.915215,0.940422,0.712191,0.296364,0.183043,0.094042,0.712191,0.889093,0.915215,0.940422,0.838403,0.804439,0.806482,0.698442,0.875344,0.902841,0.928506,0.698442,0.291781,0.180568,0.092851,0.698442,0.875344,0.902841,0.928506,0.825261,0.790962,0.793419,0.648946,0.831806,0.866636,0.905591,0.648946,0.277269,0.173327,0.090559,0.648946,0.831806,0.866636,0.905591,0.785222,0.745831,0.749063,0.785222


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

# Evaluating After Fine-tuning


In [19]:
from sentence_transformers import SentenceTransformer

fine_tuned_model = SentenceTransformer(
    args.output_dir, device="cuda" if torch.cuda.is_available() else "cpu"
)
# Evaluate the model
results = evaluator(fine_tuned_model)

# Print the main score
for dim in matryoshka_dimensions:
    key = f"dim_{dim}_cosine_ndcg@10"
    print(f"{key}: {results[key]}")

dim_1024_cosine_ndcg@10: 0.8484494758018222
dim_768_cosine_ndcg@10: 0.8464278023469214
dim_512_cosine_ndcg@10: 0.8471493017957071
dim_256_cosine_ndcg@10: 0.838328950338751
dim_128_cosine_ndcg@10: 0.8253269042594107
dim_64_cosine_ndcg@10: 0.785763822456146


## 📊 **Comparison of Model Performance Before and After Fine-Tuning**

The table below summarizes the **NDCG@10** scores before and after fine-tuning across different dimensions:

| **Dimension** | **Before Fine-Tuning** | **After Fine-Tuning** | **Improvement** |
|---------------|-------------------------|------------------------|-----------------|
| **1024**     | 0.7967                 | 0.8484                | **+0.0517**     |
| **768**      | 0.7981                 | 0.8464                | **+0.0483**     |
| **512**      | 0.7897                 | 0.8471                | **+0.0574**     |
| **256**      | 0.7522                 | 0.8383                | **+0.0861**     |
| **128**      | 0.6081                 | 0.8253                | **+0.2172**     |
| **64**       | 0.5182                 | 0.7858                | **+0.2676**     |

---

### 📝 **Key Observations**

**1. Overall Improvement:**  
- Fine-tuning led to **significant improvement across all dimensions**.  
- Lower dimensions (**64** and **128**) showed the **largest relative improvement**, suggesting fine-tuning effectively compressed meaningful information into smaller embeddings.  

**2. Higher Dimensions Perform Best:**  
- The **best absolute scores** are still observed in **higher dimensions** (**1024**, **768**, **512**), indicating these embeddings capture **richer information**.

**3. Diminishing Returns at High Dimensions:**  
- The **performance improvement** is **more marginal in higher dimensions** compared to lower ones.

---

### 🎯 **Conclusion**
- Fine-tuning was **highly effective**, with **substantial gains in NDCG@10** across all dimensions.  
- Depending on **deployment constraints** (e.g., memory and latency), choosing between:  
   - **512**: Balanced performance and efficiency.  
   - **1024**: Best absolute performance.  

This analysis provides a **clear guideline for embedding dimension selection** based on specific application needs.


# Download the Fine-tuned model

In [17]:
!tar -czvf bge-finetuned.tar.gz bge-finetuned


bge-finetuned/
bge-finetuned/2_Normalize/
bge-finetuned/tokenizer.json
bge-finetuned/checkpoint-34/
bge-finetuned/checkpoint-34/2_Normalize/
bge-finetuned/checkpoint-34/tokenizer.json
bge-finetuned/checkpoint-34/scheduler.pt
bge-finetuned/checkpoint-34/trainer_state.json
bge-finetuned/checkpoint-34/optimizer.pt
bge-finetuned/checkpoint-34/README.md
bge-finetuned/checkpoint-34/tokenizer_config.json
bge-finetuned/checkpoint-34/sentence_bert_config.json
bge-finetuned/checkpoint-34/model.safetensors
bge-finetuned/checkpoint-34/training_args.bin
bge-finetuned/checkpoint-34/sentencepiece.bpe.model
bge-finetuned/checkpoint-34/config_sentence_transformers.json
bge-finetuned/checkpoint-34/rng_state.pth
bge-finetuned/checkpoint-34/1_Pooling/
bge-finetuned/checkpoint-34/1_Pooling/config.json
bge-finetuned/checkpoint-34/special_tokens_map.json
bge-finetuned/checkpoint-34/config.json
bge-finetuned/checkpoint-34/modules.json
bge-finetuned/runs/
bge-finetuned/runs/Dec28_22-43-33_d39409bf452b/
bge-fin

In [20]:
from google.colab import files
files.download('bge-finetuned.tar.gz')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [22]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [23]:
!cp bge-finetuned.tar.gz /content/drive/MyDrive/
