In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load the h2o-danube3-4b-chat model
model_name_h2o = "h2oai/h2o-danube3-4b-chat"
tokenizer_h2o = AutoTokenizer.from_pretrained(model_name_h2o)
model_h2o = AutoModelForCausalLM.from_pretrained(model_name_h2o)

# Create a text generation pipeline
pipe_h2o = pipeline("text-generation", model=model_h2o, tokenizer=tokenizer_h2o)

# Function to generate a single-paragraph summary
def generate_summary_h2o(input_text, prompt="Analyze the email and provide a detailed paragraph describing its purpose, key ideas, and recommendations:"):
    """
    Generates a single-paragraph summary based on the given input text.

    Args:
        input_text (str): The input text (e.g., an email).
        prompt (str): The task prompt to guide the model.

    Returns:
        str: The generated single-paragraph summary.
    """
    # Build the complete prompt
    full_prompt = f"{prompt}\n\n{input_text}\n\n### Paragraph Summary:"

    try:
        # Generate text using the pipeline
        result = pipe_h2o(
            full_prompt,
            max_new_tokens=150,  # Adjust length for a concise paragraph
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
        )
        generated_text = result[0]["generated_text"]

        # Extract only the part after '### Paragraph Summary:'
        if "### Paragraph Summary:" in generated_text:
            summary = generated_text.split("### Paragraph Summary:")[1].strip()
        else:
            summary = generated_text.strip()  # Fallback if no delimiter is found

        # Clean up any repeated content or extra text
        if "Message-ID" in summary:
            summary = summary.split("Message-ID")[0].strip()

        return summary
    except Exception as e:
        return f"Error during generation: {e}"

# Example usage
input_text = """
Message-ID: <15464986.1075855378456.JavaMail.evans@thyme>
Date: Fri, 4 May 2001 13:51:00 -0700 (PDT)
From: phillip.allen@enron.com
To: john.lavorato@enron.com
Subject: Re:
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
X-From: Phillip K Allen
X-To: John J Lavorato <John J Lavorato/ENRON@enronXgate@ENRON>
X-cc:
X-bcc:
X-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.'Sent Mail
X-Origin: Allen-P
X-FileName: pallen (Non-Privileged).pst

Traveling to have a business meeting takes the fun out of the trip. Especially if you have to prepare a presentation. I would suggest holding the business plan meetings here then take a trip without any formal business meetings. I would even try and get some honest opinions on whether a trip is even desired or necessary.

As far as the business meetings, I think it would be more productive to try and stimulate discussions across the different groups about what is working and what is not. Too often the presenter speaks and the others are quiet just waiting for their turn. The meetings might be better if held in a round table discussion format.

My suggestion for where to go is Austin. Play golf and rent a ski boat and jet ski's. Flying somewhere takes too much time.
"""

# Generate the paragraph summary
paragraph_summary = generate_summary_h2o(input_text)

# Print only the summary
print(paragraph_summary)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.70k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.79M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/484 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/18.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.96G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import warnings

# Set random seed for reproducibility
torch.random.manual_seed(0)

# Load the model and tokenizer
model_name_phi3 = "microsoft/Phi-3-mini-128k-instruct"
model_phi3 = AutoModelForCausalLM.from_pretrained(model_name_phi3, torch_dtype="auto", trust_remote_code=True).to("cuda")
tokenizer_phi3 = AutoTokenizer.from_pretrained(model_name_phi3)

# Disable parallel tokenizer warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Define the pipeline
pipe_phi3 = pipeline(
    "text-generation",
    model=model_phi3,
    tokenizer=tokenizer_phi3,
)
# Function to generate a paragraph-style summary
def generate_summary_phi3(input_text, prompt="Analyze the email and provide a detailed paragraph describing its purpose, key ideas, and recommendations:"):
    """
    Generates a paragraph-style summary based on the given input text.

    Args:
        input_text (str): The input text (e.g., an email).
        prompt (str): The task prompt to guide the model.

    Returns:
        str: The generated paragraph-style summary or description.
    """
    # Add a clear delimiter for the model to follow
    full_prompt = f"{prompt}\n\n{input_text}\n\n### Paragraph Summary:"

    try:
        # Generate text using the pipeline
        result = pipe_phi3(
            full_prompt,
            max_new_tokens=200,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
        )
        generated_text = result[0]["generated_text"]

        # Extract only the part after '### Paragraph Summary:'
        if "### Paragraph Summary:" in generated_text:
            summary = generated_text.split("### Paragraph Summary:")[1].strip()
        else:
            summary = generated_text.strip()  # Fallback if no delimiter is found

        # Clean up: remove any part that repeats the input email
        if "Email:" in summary:
            summary = summary.split("Email:")[0].strip()

        return summary
    except Exception as e:
        return f"Error during generation: {e}"

# Example usage
input_text = """
Message-ID: <15464986.1075855378456.JavaMail.evans@thyme>
Date: Fri, 4 May 2001 13:51:00 -0700 (PDT)
From: phillip.allen@enron.com
To: john.lavorato@enron.com
Subject: Re:
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
X-From: Phillip K Allen
X-To: John J Lavorato <John J Lavorato/ENRON@enronXgate@ENRON>
X-cc:
X-bcc:
X-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.'Sent Mail
X-Origin: Allen-P
X-FileName: pallen (Non-Privileged).pst

Traveling to have a business meeting takes the fun out of the trip. Especially if you have to prepare a presentation. I would suggest holding the business plan meetings here then take a trip without any formal business meetings. I would even try and get some honest opinions on whether a trip is even desired or necessary.

As far as the business meetings, I think it would be more productive to try and stimulate discussions across the different groups about what is working and what is not. Too often the presenter speaks and the others are quiet just waiting for their turn. The meetings might be better if held in a round table discussion format.

My suggestion for where to go is Austin. Play golf and rent a ski boat and jet ski's. Flying somewhere takes too much time.
"""

# Generate the paragraph-style summary
description = generate_summary_phi3(input_text)

# Print only the summary
print(description)


config.json:   0%|          | 0.00/3.48k [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-128k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-128k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.94M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48


This email, sent by Phillip K. Allen to John J. Lavorato, discusses the downsides of combining business travel with presentations. Allen suggests that business meetings would be more productive if held in Austin, where attendees could engage in round-table discussions rather than passive listening. He humorously recommends leisure activities such as golf and water sports in lieu of flying, citing the excessive time these flights take.


## Instruction 2


In [1]:
import pandas as pd
from google.colab import drive
# **Étape 1 : Connecter Google Drive et Charger le fichier**
drive.mount('/content/drive')

file_path = '/content/drive/MyDrive/emails_final1.csv'  # Remplacez par votre chemin réel
data = pd.read_csv(file_path)

Mounted at /content/drive


In [None]:
data['resume_h2o-2'] = data['message'].apply(generate_summary_h2o)


In [None]:
data2['resume_h2o_2'].head()

Unnamed: 0,resume_h2o_2
0,0 The email is from Phillip Allen to Tim Belde...
1,Phillip Allen suggests holding business meetin...
2,Phillip Allen sends a test email to Leah Van A...
3,Phillip Allen requests a detailed schedule wit...
4,Phillip Allen sends a message about a meeting ...


In [None]:
data['resume_phi3_2'] = data['message'].apply(generate_summary_phi3)


In [None]:
data2['resume_phi3_2'].head()

In [None]:
data2


Unnamed: 0,file,message,resume_phi3,resume_h2o,resume_phi3_2,resume_h2o_2,resume
0,allen-p/_sent_mail/1.,Message-ID: <18782981.1075855378110.JavaMail.e...,"The email, sent by Phillip Allen to Tim Belden...",The email is a message from Phillip Allen to T...,L'email envoyé par Phillip Allen à Tim Belden ...,0 The email is from Phillip Allen to Tim Belde...,Prévision partagée (14 mai 2001) – Phillip All...
1,allen-p/_sent_mail/10.,Message-ID: <15464986.1075855378456.JavaMail.e...,"The purpose of this email, sent by Phillip K A...","The sender, Phillip Allen, suggests that inste...","Dans un autre email, Phillip Allen propose à J...",Phillip Allen suggests holding business meetin...,Suggestion pour un voyage d'affaires (4 mai 20...
2,allen-p/_sent_mail/100.,Message-ID: <24216240.1075855687451.JavaMail.e...,"The email, sent by Phillip Allen to Leah Van A...",The email from Phillip Allen to Leah Van Arsda...,Un troisième email adressé à Leah Van Arsdall...,Phillip Allen sends a test email to Leah Van A...,Test réussi (18 octobre 2000) – Phillip Allen ...
3,allen-p/_sent_mail/1000.,Message-ID: <13505866.1075863688222.JavaMail.e...,"The email, sent by Phillip Allen to Randall Ga...",Phillip Allen requests a schedule detailing th...,"Dans un email à Randall Gay (23 octobre 2000),...",Phillip Allen requests a detailed schedule wit...,Demande de détails sur les salaires et niveaux...
4,allen-p/_sent_mail/1001.,Message-ID: <30922949.1075863688243.JavaMail.e...,"The email, sent by Phillip Allen to Greg Piper...",- Phillip Allen sends a message about a Tuesda...,Un autre message à Greg Piper fait suite à un...,Phillip Allen sends a message about a meeting ...,Proposition de réunion (31 août 2000) – Philli...
...,...,...,...,...,...,...,...
95,allen-p/_sent_mail/180.,Message-ID: <29919154.1075855689201.JavaMail.e...,The email from Phillip Allen to Mac D. Hargrov...,"Phillip Allen, from Enron, sends a message to ...",The email from Phillip Allen to Mac D. Hargrov...,"Phillip Allen, from Enron, sends a message to ...",I think crude price are undervalued by the tun...
96,allen-p/_sent_mail/181.,Message-ID: <4511963.1075855689223.JavaMail.ev...,"The email, sent by Phillip Allen from Enron to...","The email from ""Lucy Gonzalez"" to Phillip Alle...","The email, sent by Phillip Allen from Enron to...",The email from Lucy Gonzalez to Phillip Allen ...,Summarize the following email by focusing on t...
97,allen-p/_sent_mail/182.,Message-ID: <33111317.1075855689245.JavaMail.e...,"This email, sent by Phillip Allen of Enron to ...",This email is from Phillip Allen to pallen70@h...,"This email, sent by Phillip Allen of Enron to ...",This email is from Phillip Allen to pallen70@h...,"""Lucy Gonzalez"": ""The a/c I bought today for #..."
98,allen-p/_sent_mail/183.,Message-ID: <1665326.1075855689266.JavaMail.ev...,"The email, sent by Phillip Allen to pallen70@h...",A message from Phillip Allen at Enron to Phyll...,"The email, sent by Phillip Allen to pallen70@h...",A message from Phillip Allen at Enron to Phyll...,Summarize the following email by focusing on t...


In [None]:
for i in data['resume_h2o']:
   print(i)



@@@@@@@@@@@@@@@@@@@@@@@@@@@@0
The email is a message from Phillip Allen to Tim Belden containing a text-based forecast. The message details the sender's name, the date, and the subject matter of the email, which is the forecast. The email is a part of a series of communications within an EnronXGate folder, indicating that it might be related to Enron's business operations or activities. The message is not encrypted and is in plain text. The email also includes a header that specifies the sender, the recipient, and the email's origin. The email was sent on May 14, 2001, at 4:39 PM, and the sender's email address is phillip
@@@@@@@@@@@@@@@@@@@@@@@@@@@@1
The sender, Phillip Allen, suggests that instead of traveling for business meetings, they should be held in Austin, Texas, to make the trip more enjoyable. He proposes a more interactive meeting format with round table discussions, and he personally recommends Austin for its recreational activities and shorter travel time. The email also 

In [None]:
output_path = '/content/drive/MyDrive/emails_final1.csv'
data2.to_csv(output_path, index=False)

In [None]:
data2

Unnamed: 0,file,message,resume_phi3,resume_h2o,resume_phi3_2,resume_h2o_2,resume
0,allen-p/_sent_mail/1.,Message-ID: <18782981.1075855378110.JavaMail.e...,"The email, sent by Phillip Allen to Tim Belden...",The email is a message from Phillip Allen to T...,L'email envoyé par Phillip Allen à Tim Belden ...,0 The email is from Phillip Allen to Tim Belde...,Prévision partagée (14 mai 2001) – Phillip All...
1,allen-p/_sent_mail/10.,Message-ID: <15464986.1075855378456.JavaMail.e...,"The purpose of this email, sent by Phillip K A...","The sender, Phillip Allen, suggests that inste...","Dans un autre email, Phillip Allen propose à J...",Phillip Allen suggests holding business meetin...,Suggestion pour un voyage d'affaires (4 mai 20...
2,allen-p/_sent_mail/100.,Message-ID: <24216240.1075855687451.JavaMail.e...,"The email, sent by Phillip Allen to Leah Van A...",The email from Phillip Allen to Leah Van Arsda...,Un troisième email adressé à Leah Van Arsdall...,Phillip Allen sends a test email to Leah Van A...,Test réussi (18 octobre 2000) – Phillip Allen ...
3,allen-p/_sent_mail/1000.,Message-ID: <13505866.1075863688222.JavaMail.e...,"The email, sent by Phillip Allen to Randall Ga...",Phillip Allen requests a schedule detailing th...,"Dans un email à Randall Gay (23 octobre 2000),...",Phillip Allen requests a detailed schedule wit...,Demande de détails sur les salaires et niveaux...
4,allen-p/_sent_mail/1001.,Message-ID: <30922949.1075863688243.JavaMail.e...,"The email, sent by Phillip Allen to Greg Piper...",- Phillip Allen sends a message about a Tuesda...,Un autre message à Greg Piper fait suite à un...,Phillip Allen sends a message about a meeting ...,Proposition de réunion (31 août 2000) – Philli...
...,...,...,...,...,...,...,...
95,allen-p/_sent_mail/180.,Message-ID: <29919154.1075855689201.JavaMail.e...,The email from Phillip Allen to Mac D. Hargrov...,"Phillip Allen, from Enron, sends a message to ...",The email from Phillip Allen to Mac D. Hargrov...,"Phillip Allen, from Enron, sends a message to ...",I think crude price are undervalued by the tun...
96,allen-p/_sent_mail/181.,Message-ID: <4511963.1075855689223.JavaMail.ev...,"The email, sent by Phillip Allen from Enron to...","The email from ""Lucy Gonzalez"" to Phillip Alle...","The email, sent by Phillip Allen from Enron to...",The email from Lucy Gonzalez to Phillip Allen ...,Summarize the following email by focusing on t...
97,allen-p/_sent_mail/182.,Message-ID: <33111317.1075855689245.JavaMail.e...,"This email, sent by Phillip Allen of Enron to ...",This email is from Phillip Allen to pallen70@h...,"This email, sent by Phillip Allen of Enron to ...",This email is from Phillip Allen to pallen70@h...,"""Lucy Gonzalez"": ""The a/c I bought today for #..."
98,allen-p/_sent_mail/183.,Message-ID: <1665326.1075855689266.JavaMail.ev...,"The email, sent by Phillip Allen to pallen70@h...",A message from Phillip Allen at Enron to Phyll...,"The email, sent by Phillip Allen to pallen70@h...",A message from Phillip Allen at Enron to Phyll...,Summarize the following email by focusing on t...


In [2]:
pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=2c1773c71bcd6146324f299bc8031c79eca72446cdb0b04bd4a417b39872c910
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [3]:
pip install bert-score


Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert-score
Successfully installed bert-score-0.3.13


In [5]:
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from bert_score import score

# **Étape 5 : Évaluation avec ROUGE, BLEU et BERTScore**
rouge_scorer_tool = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
smoothie = SmoothingFunction().method4

# Liste des colonnes à évaluer par rapport à 'resume'
columns_to_evaluate = ['resume_phi3', 'resume_h2o', 'resume_phi3_2', 'resume_h2o_2']

# Dictionnaires pour stocker les scores
rouge_scores = {col: {'rouge1': [], 'rouge2': [], 'rougeL': []} for col in columns_to_evaluate}
bleu_scores = {col: [] for col in columns_to_evaluate}
bertscore_scores = {col: [] for col in columns_to_evaluate}  # For storing BERTScore

# Parcours des lignes du DataFrame
for i, row in data.iterrows():
    reference = row['resume']

    # Calcul des scores pour chaque colonne par rapport à 'resume'
    for col in columns_to_evaluate:
        candidate = row[col]

        # Calcul des scores ROUGE
        scores = rouge_scorer_tool.score(reference, candidate)
        rouge_scores[col]['rouge1'].append(scores['rouge1'].fmeasure)
        rouge_scores[col]['rouge2'].append(scores['rouge2'].fmeasure)
        rouge_scores[col]['rougeL'].append(scores['rougeL'].fmeasure)

        # Calcul des scores BLEU
        reference_tokens = reference.split()
        candidate_tokens = candidate.split()
        bleu_score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie)
        bleu_scores[col].append(bleu_score)

        # Calcul des scores BERTScore
        P, R, F1 = score([candidate], [reference], lang='en')
        bertscore_scores[col].append(F1.item())

# Calcul des moyennes des scores ROUGE, BLEU et BERTScore pour chaque colonne
average_rouge_scores = {col: {metric: sum(scores) / len(scores) if scores else 0
                              for metric, scores in rouge_scores[col].items()}
                       for col in rouge_scores}

average_bleu_scores = {col: sum(scores) / len(scores) if scores else 0
                       for col, scores in bleu_scores.items()}

average_bertscore_scores = {col: sum(scores) / len(scores) if scores else 0
                            for col, scores in bertscore_scores.items()}

# Affichage des résultats
for col in columns_to_evaluate:
    print(f"Scores moyens ROUGE pour {col}:")
    print(f"  ROUGE-1: {average_rouge_scores[col]['rouge1']}")
    print(f"  ROUGE-2: {average_rouge_scores[col]['rouge2']}")
    print(f"  ROUGE-L: {average_rouge_scores[col]['rougeL']}")
    print(f"Score moyen BLEU pour {col}: {average_bleu_scores[col]}")
    print(f"Score moyen BERTScore pour {col}: {average_bertscore_scores[col]}")
    print("\n")

# Enregistrement des résultats dans un fichier
output_path = "path/to/output/file"  # Remplacer par le chemin de votre fichier de sortie
print(f"Les résultats ont été enregistrés dans {output_path}.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['ro

Scores moyens ROUGE pour resume_phi3:
  ROUGE-1: 0.19174042877389882
  ROUGE-2: 0.05444469026867209
  ROUGE-L: 0.12998370549260674
Score moyen BLEU pour resume_phi3: 0.020929190265824796
Score moyen BERTScore pour resume_phi3: 0.8172372645139694


Scores moyens ROUGE pour resume_h2o:
  ROUGE-1: 0.24834541067361487
  ROUGE-2: 0.0931865986870522
  ROUGE-L: 0.18012826021033554
Score moyen BLEU pour resume_h2o: 0.03844031024479857
Score moyen BERTScore pour resume_h2o: 0.8268438225984573


Scores moyens ROUGE pour resume_phi3_2:
  ROUGE-1: 0.2154632100643438
  ROUGE-2: 0.053201035194293145
  ROUGE-L: 0.14664846627891687
Score moyen BLEU pour resume_phi3_2: 0.0256734295101145
Score moyen BERTScore pour resume_phi3_2: 0.8201579874753953


Scores moyens ROUGE pour resume_h2o_2:
  ROUGE-1: 0.2100917209823792
  ROUGE-2: 0.07621098839724227
  ROUGE-L: 0.15629520232369232
Score moyen BLEU pour resume_h2o_2: 0.028991323594741165
Score moyen BERTScore pour resume_h2o_2: 0.8272171658277512


Les rés

# Rapport sur la Comparaison des Prompts et Analyse des Erreurs pour la Tâche de Résumé de Texte

## 1. Introduction

L’objectif de ce projet est d’évaluer les performances de trois modèles de génération de texte (NVIDIA/Minitron-4B-Base, Microsoft/Phi-3-mini-128k-instruct, H2O-Danube3-4B-Chat) sur la tâche de résumé de texte. Deux formulations de prompts ont été utilisées pour chaque modèle :

- **Prompt 1** : "Summarize the email in a concise paragraph."
- **Prompt 2** : "Analyze the email and provide a detailed paragraph describing its purpose, key ideas, and recommendations."

Les scores **ROUGE** et **BLEU** ont été calculés par rapport à un modèle de référence, Google/Pegasus-CNN-Dailymail, pour évaluer la qualité des résumés.

## 2. Résultats par Modèle et Prompt

### 2.1. Microsoft/Phi-3-mini-128k-instruct

| Prompt   | ROUGE-1  | ROUGE-2  | ROUGE-L  | BLEU    |
|----------|----------|----------|----------|---------|
| Prompt 1 | 0.1917   | 0.0544   | 0.1299   | 0.0209  |
| Prompt 2 | 0.2154   | 0.0532   | 0.1466   | 0.0257  |

- **Analyse** :
    - Les scores augmentent avec **Prompt 2** pour **ROUGE-1** (+12.4%) et **ROUGE-L** (+12.8%).
    - Cela montre que le prompt détaillé améliore la richesse du résumé, même si **ROUGE-2** reste stable.
    - **BLEU** montre également une légère amélioration.

### 2.2. H2O-Danube3-4B-Chat

| Prompt   | ROUGE-1  | ROUGE-2  | ROUGE-L  | BLEU    |
|----------|----------|----------|----------|---------|
| Prompt 1 | 0.2100   | 0.0762   | 0.1563   | 0.0290  |
| Prompt 2 | 0.2483   | 0.0932   | 0.1801   | 0.0384  |

- **Analyse** :
    - Les résultats sont maintenant inversés, montrant que **Prompt 1** est légèrement moins performant que **Prompt 2** pour **ROUGE-1** et **ROUGE-2**.
    - Cependant, **ROUGE-L** reste plus faible avec **Prompt 1** comparé à **Prompt 2**, ce qui suggère que **Prompt 2** est plus adapté pour générer des résumés plus détaillés.

### 2.3. NVIDIA/Minitron-4B-Base

| Prompt   | ROUGE-1  | ROUGE-2  | ROUGE-L  | BLEU    |
|----------|----------|----------|----------|---------|
| Prompt 1 | 0.1155   | 0.0072   | 0.0841   | 0.0057  |
| Prompt 2 | 0.1269   | 0.0084   | 0.0868   | 0.0062  |

- **Analyse** :
    - **Prompt 2** donne de meilleurs scores pour **ROUGE-1** (0.1269 vs 0.1155) et **ROUGE-2** (0.0084 vs 0.0072) comparé à **Prompt 1**. Cela montre que **Prompt 2** génère des résumés plus pertinents en termes de correspondance avec le modèle de référence, particulièrement pour les éléments clés du texte.
    
    - Le score **BLEU** est très faible dans les deux cas (0.0062 pour **Prompt 2** et 0.0057 pour **Prompt 1**), ce qui suggère que les résumés produits ne correspondent pas précisément au texte de référence au niveau des n-grammes, ce qui est typique pour des résumés très concis ou détaillés qui diffèrent structurellement du texte source.
    
    - Le **ROUGE-L** est relativement faible également (0.0868 pour **Prompt 2** et 0.0841 pour **Prompt 1**), ce qui indique que les modèles ne parviennent pas à capturer parfaitement la structure longue du texte source, mais le score reste relativement similaire pour les deux prompts. Cela peut être dû à la difficulté de conserver la structure complète d'un texte plus long, même dans un résumé détaillé.
    
    - Il est important de noter que ces résultats montrent que **NVIDIA/Minitron-4B-Base** a une capacité à générer des résumés cohérents, mais que les scores **BLEU** et **ROUGE-L** restent faibles en comparaison avec les autres modèles comme **H2O-Danube3-4B**, ce qui peut indiquer une incapacité à saisir les relations longues et complexes dans le texte.
### 2.4. stabilityai/stablelm-zephyr-3b
    - le modèle stabilityai/stablelm-zephyr-3b a montré de bons résultats. Bien qu'il n'y ait pas de métriques d’évaluation détaillées dans ce rapport, ce modèle est performant et génère des résumés de qualité qui correspondent bien aux attentes de la tâche.

## 3. Comparaison des Meilleurs Prompts

| Modèle              | Meilleur Prompt | ROUGE-1  | ROUGE-2  | ROUGE-L  | BLEU    |
|---------------------|-----------------|----------|----------|----------|---------|
| Microsoft/Phi-3     | Prompt 2        | 0.2154   | 0.0532   | 0.1466   | 0.0257  |
| H2O-Danube3-4B      | Prompt 2        | 0.2483   | 0.0932   | 0.1801   | 0.0384  |
| NVIDIA/Minitron-4B  | Prompt 2        | 0.1269   | 0.0084   | 0.0868   | 0.0062  |

- **Observation générale** :
    - **H2O-Danube3-4B** avec **Prompt 2** donne les meilleurs scores **ROUGE-1**, **ROUGE-2**, et **ROUGE-L**, avec un **BLEU** de 0.0384, ce qui en fait le modèle le plus performant dans cette comparaison.
    - **Microsoft/Phi-3-mini-128k** obtient de meilleurs résultats avec **Prompt 2**, mais reste en dessous de H2O-Danube3-4B.
    - **NVIDIA/Minitron-4B**, bien que performant, a un score **BLEU** plus faible, ce qui indique que la structure des résumés générés est moins conforme aux attentes des métriques basées sur des n-grammes.

## 4. Temps d’Exécution

| Modèle               | Temps d'exécution |
|----------------------|-------------------|
| NVIDIA/Minitron-4B   | 2h10min           |
| Microsoft/Phi-3-mini | 1h20min           |
| H2O-Danube3-4B       | 1h40min           |
| stablelm-zephyr-3b   | 30min           |

- **Analyse** :
    - **stabilityai/stablelm-zephyr-3b** est le plus rapide (1h20min) tout en offrant des performances solides avec **Prompt 2**.
    - **H2O-Danube3-4B** et **Microsoft/Phi-3-mini** est légèrement plus lent mais fournit les meilleurs scores.
    - **NVIDIA/Minitron-4B** (2h10min) est le modèle le plus lent, malgré des résultats moyens.

## 5. Analyse des Erreurs

### 5.1. Par Prompt

- **Prompt 1** :
    - Génère des résumés plus concis, mais manque parfois de détails critiques comme les recommandations ou idées secondaires.
    - Peut conduire à des résumés plus proches du texte source, mais à un niveau de détail plus faible.

- **Prompt 2** :
    - Fournit des résumés détaillés mais inclut parfois des informations redondantes ou moins pertinentes.

### 5.2. Par Modèle

- **Microsoft/Phi-3-mini** : Bonne capacité d’analyse avec **Prompt 2**, mais tendance à ajouter des informations implicites non présentes dans le texte source.
- **H2O-Danube3-4B** : Résumés équilibrés et précis avec **Prompt 2**, bien qu'une légère perte de détail apparaisse avec **Prompt 1**.
- **NVIDIA/Minitron-4B** : Temps d’exécution élevé avec des résumés parfois moins cohérents pour des textes complexes.

## 6. Conclusion

Le modèle **H2O-Danube3-4B-Chat** avec **Prompt 2** s'est montré le plus performant, avec les meilleurs scores **ROUGE-1**, **ROUGE-2**, et **ROUGE-L**, mais avec un score **BLEU** modeste. Cependant, la complexité du texte et le choix du prompt jouent un rôle crucial dans l'amélioration des résultats. Les résultats montrent que les prompts plus détaillés (comme **Prompt 2**) peuvent parfois offrir une meilleure couverture des informations, mais aussi mener à des résumés moins précis dans certaines situations.

