### In this notebook I've compaired 5 SOTA model Bangla summarization ROUGE test result. The result dataset is avilable [here](https://kaggle.com/datasets/5d06d02e6778b7d73c2dd67b30db407e4247ba80e69f9d96f974ea94b05064eb).

### From this comparison, we can see that the Gemma 2 9B model performs well among the other models. The LLaMA 3.1 and FILM are also close competitors to the Gemma 2. The LLaMA 3 8B model performs worse than the Gemma 2B model.

To see all the comparisons, please scroll down to the end.

In [1]:
import pandas as pd

In [2]:
film_rouge1 = pd.read_csv('/kaggle/input/ROUGE_test_result/FILM_rouge1_results.csv')
film_rougeL = pd.read_csv('/kaggle/input/ROUGE_test_result/FILM_rougeL_results.csv')
la31_rouge1 = pd.read_csv('/kaggle/input/ROUGE_test_result/L31_rouge1_results.csv')
la31_rougeL = pd.read_csv('/kaggle/input/ROUGE_test_result/L31_rougeL_results.csv')
g2b_rouge1 = pd.read_csv('/kaggle/input/ROUGE_test_result/g2b_rouge1_results.csv')
g2b_rougeL = pd.read_csv('/kaggle/input/ROUGE_test_result/g2b_rougeL_results.csv')
la3_rouge1 = pd.read_csv('/kaggle/input/ROUGE_test_result/la3_rouge1_results.csv')
la3_rougeL = pd.read_csv('/kaggle/input/ROUGE_test_result/la3_rougeL_results.csv')
gm29_rouge1 = pd.read_csv('/kaggle/input/ROUGE_test_result/gm29_rouge1_results.csv')
gm29_rougeL = pd.read_csv('/kaggle/input/ROUGE_test_result/gm29_rougeL_results.csv')

In [3]:
import pandas as pd

dataframes = [film_rouge1, la31_rouge1, g2b_rouge1, la3_rouge1, gm29_rouge1]

combined_df = pd.concat(dataframes, axis=0, ignore_index=True)


In [None]:
film_rougeL, la31_rougeL, g2b_rougeL, la3_rougeL, gm29_rougeL

In [4]:
combined_dfL = pd.concat([film_rougeL, la31_rougeL, g2b_rougeL, la3_rougeL, gm29_rougeL], ignore_index=True)


In [5]:
avg_metricsL = combined_dfL.groupby('Model').agg({
    'R-L Precision': 'mean',
    'R-L Recall': 'mean',
    'R-L F1': 'mean'
}).reset_index()

# Display the average scores
print(avg_metricsL)

          Model  R-L Precision  R-L Recall    R-L F1
0      Gemma 2b       0.475575    0.387345  0.393385
1     Gemma2 9b       0.512990    0.430421  0.451765
2       Llama 3       0.409070    0.445607  0.390156
3     Llama 3.1       0.488226    0.431421  0.433025
4  Mistral FILM       0.505167    0.412302  0.432554


In [8]:
avg_metricsL = combined_df.groupby('Model').agg({
    'R1 Precision': 'mean',
    'R1 Recall': 'mean',
    'R1 F1': 'mean'
}).reset_index()

# Display the average scores
print(avg_metricsL)

          Model  R1 Precision  R1 Recall     R1 F1
0      Gemma 2b      0.153978   0.120111  0.123750
1     Gemma2 9b      0.221329   0.176950  0.189677
2       Llama 3      0.124687   0.135009  0.118648
3     Llama 3.1      0.187373   0.161199  0.164028
4  Mistral FILM      0.194095   0.147957  0.159568


In [13]:
import plotly.express as px

fig = px.line(
    combined_df, 
    x='Context size', 
    y='R1 Precision', 
    color='Model',  
    markers=True,   
    title='R1 Precision vs Context size for each Model'
)

fig.update_layout(
    xaxis_title='Context size',
    yaxis_title='R1 Precision',
    legend_title='Model'
)

fig.show()


In [16]:
import plotly.express as px

fig = px.line(
    combined_df, 
    x='Context size', 
    y='R1 Recall', 
    color='Model',  
    markers=True,   
    title='R1 Recall vs Context size for each Model'
)

fig.update_layout(
    xaxis_title='Context size',
    yaxis_title='R1 Recall',
    legend_title='Model'
)

fig.show()

In [17]:
import plotly.express as px


fig = px.line(
    combined_df, 
    x='Context size', 
    y='R1 F1', 
    color='Model',  
    markers=True,   
    title='R1 F1 vs Context size for each Model'
)

# Customize axis labels
fig.update_layout(
    xaxis_title='Context size',
    yaxis_title='R1 F1',
    legend_title='Model'
)

# Show the plot
fig.show()

In [24]:
import pandas as pd
import plotly.graph_objects as go



combined_df['R1 Precision'] = pd.to_numeric(combined_df['R1 Precision'], errors='coerce')
combined_df['R1 Recall'] = pd.to_numeric(combined_df['R1 Recall'], errors='coerce')
combined_df['R1 F1'] = pd.to_numeric(combined_df['R1 F1'], errors='coerce')

avg_metrics = combined_df.groupby('Model').agg({
    'R1 Precision': 'mean',
    'R1 Recall': 'mean',
    'R1 F1': 'mean'
}).reset_index()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=avg_metrics['Model'],
    y=avg_metrics['R1 Precision'],
    name='R1 Precision',
    marker_color='blue'
))

fig.add_trace(go.Bar(
    x=avg_metrics['Model'],
    y=avg_metrics['R1 Recall'],
    name='R1 Recall',
    marker_color='green'
))

fig.add_trace(go.Bar(
    x=avg_metrics['Model'],
    y=avg_metrics['R1 F1'],
    name='R1 F1',
    marker_color='red'
))

fig.update_layout(
    title='Comparison of Average R1 Metrics for Each Model',
    xaxis_title='Model',
    yaxis_title='Average Score',
    barmode='group',  
    legend_title='Metrics',
    plot_bgcolor='rgba(0,0,0,0)',  
    xaxis=dict(tickangle=-45)  
)

fig.show()


In [40]:
import plotly.express as px



fig = px.line(
    combined_dfL, 
    x='Context size', 
    y='R-L Precision', 
    color='Model',  
    markers=True,   
    title='R-L Precision vs Context size for each Model'
)

fig.update_layout(
    xaxis_title='Context size',
    yaxis_title='R-L Precision',
    legend_title='Model'
)

fig.show()

In [41]:
import plotly.express as px



fig = px.line(
    combined_dfL, 
    x='Context size', 
    y='R-L Recall', 
    color='Model',  
    markers=True,   
    title='R-L Recall vs Context size for each Model'
)

fig.update_layout(
    xaxis_title='Context size',
    yaxis_title='R-L Recall',
    legend_title='Model'
)

fig.show()

In [43]:
import plotly.express as px


fig = px.line(
    combined_dfL, 
    x='Context size', 
    y='R-L F1', 
    color='Model',  
    markers=True,   
    title='R-L F1 vs Context size for each Model'
)


fig.update_layout(
    xaxis_title='Context size',
    yaxis_title='R-L F1',
    legend_title='Model'
)

fig.show()

In [46]:
import pandas as pd
import plotly.graph_objects as go

combined_dfL['R-L Precision'] = pd.to_numeric(combined_dfL['R-L Precision'], errors='coerce')
combined_dfL['R-L Recall'] = pd.to_numeric(combined_dfL['R-L Recall'], errors='coerce')
combined_dfL['R-L F1'] = pd.to_numeric(combined_dfL['R-L F1'], errors='coerce')

avg_metricsL = combined_dfL.groupby('Model').agg({
    'R-L Precision': 'mean',
    'R-L Recall': 'mean',
    'R-L F1': 'mean'
}).reset_index()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=avg_metricsL['Model'],
    y=avg_metricsL['R-L Precision'],
    name='R-L Precision',
    marker_color='blue'
))

fig.add_trace(go.Bar(
    x=avg_metricsL['Model'],
    y=avg_metricsL['R-L Recall'],
    name='R-L Recall',
    marker_color='green'
))

fig.add_trace(go.Bar(
    x=avg_metricsL['Model'],
    y=avg_metricsL['R-L F1'],
    name='R-L F1',
    marker_color='red'
))

fig.update_layout(
    title='Comparison of Average R-L Metrics for Each Model',
    xaxis_title='Model',
    yaxis_title='Average Score',
    barmode='group',  
    legend_title='Metrics',
    plot_bgcolor='rgba(0,0,0,0)',  
    xaxis=dict(tickangle=-45)  
)

fig.show()
