In [8]:
import pandas as pd
import numpy as np

In [9]:
data = pd.read_csv('../full-experiments/all_results/ALL_results.csv')

In [4]:
df = data.drop_duplicates(subset='Dataset', keep='last')

In [10]:
print(df.head())
print(df.columns)

              Dataset  corr_threshold  const_threshold  seed  \
639  full-dv-data.csv            0.95              0.2   489   

                                    class_distribution  dropout  regular  \
639  target\n0    3871\n1    2575\nName: count, dty...     2575     3871   

    Model  Accuracy  Balanced Accuracy  f1-score    recall  precision  \
639    nb  0.810115           0.809462  0.772321  0.806214   0.741164   

     CM - True Positive  CM - False Negative  CM - False Positive  \
639                3146                  725                  499   

     CM - True Negative  total_examples  dropout_rate  
639                2076            6446      0.399473  
Index(['Dataset', 'corr_threshold', 'const_threshold', 'seed',
       'class_distribution', 'dropout', 'regular', 'Model', 'Accuracy',
       'Balanced Accuracy', 'f1-score', 'recall', 'precision',
       'CM - True Positive', 'CM - False Negative', 'CM - False Positive',
       'CM - True Negative', 'total_examples', 

In [11]:
# Calcula os totais e a taxa de Dropout
df['total_examples'] = df['regular'] + df['dropout']
df['dropout_rate'] = df['dropout'] / df['total_examples']

# Filtrando para manter apenas os datasets com taxa de dropout maior que 0,25
filtered_df = df[df['dropout_rate'] > 0.25]

# Gera o LaTeX
latex = r"""
% ==========
\begin{table*}[htb]
    \centering
    \footnotesize
     \caption{Binary classification tasks generated for the experiments}
    \label{tab:datasets}
    \begin{tabular}{lccccc}
    \toprule

    \multirow{2}{*}{\textbf{Acronym (Regular vs Dropout)}} & \multirow{2}{*}{\textbf{Features}} & \multirow{2}{*}{\textbf{Examples}} & \textbf{Class} & \textbf{Class} & \textbf{Dropout} \\
    & & & \textbf{Regular} & \textbf{Dropout} & \textbf{rate} \\

    \midrule
"""

for i, row in filtered_df.iterrows():
    # Escapa os underscores nos nomes dos datasets
    dataset_name = row['Dataset'].replace('_', r'\_')
    latex += f"        {dataset_name} & 43 & {row['total_examples']} & {row['regular']} & {row['dropout']} & {row['dropout_rate']:.2f} \\\\ \n"

latex += r"""
    \bottomrule
    \end{tabular}
 
\end{table*}
% ==========
"""

print(latex)



\begin{table*}[htb]
    \centering
    \footnotesize
     \caption{Binary classification tasks generated for the experiments}
    \label{tab:datasets}
    \begin{tabular}{lccccc}
    \toprule

    \multirow{2}{*}{\textbf{Acronym (Regular vs Dropout)}} & \multirow{2}{*}{\textbf{Features}} & \multirow{2}{*}{\textbf{Examples}} & \textbf{Class} & \textbf{Class} & \textbf{Dropout} \\
    & & & \textbf{Regular} & \textbf{Dropout} & \textbf{rate} \\

    \midrule
        full-dv-data.csv & 43 & 6446 & 3871 & 2575 & 0.40 \\ 

    \bottomrule
    \end{tabular}
 
\end{table*}



In [7]:
# Lista dos datasets que vocÃª deseja incluir
datasets_to_include = [
    "data_dv_last_occurence_FormReg_Desis.csv",
    "data_dv_last_occurence_FormReg_DesisTran.csv",
    "data_dv_last_occurence_Reg_Desis.csv",
    "data_dv_last_occurence_Reg_DesisTran.csv",
    "data_dv_last_occurence_Form_Desis.csv",
    "data_dv_FormReg_Desis.csv",
    "data_dv_FormReg_DesisTran.csv",
    "data_dv_Reg_Desis.csv",
    "data_dv_Reg_DesisTran.csv",
    "data_dv_Form_Desis.csv"
]

# Filtra o DataFrame para incluir apenas os datasets especificados
filtered_df = df[df['Dataset'].isin(datasets_to_include)]

# Calcula os totais e a taxa de Dropout
filtered_df['total_examples'] = filtered_df['regular'] + filtered_df['dropout']
filtered_df['dropout_rate'] = filtered_df['dropout'] / filtered_df['total_examples']

# Gera o LaTeX
latex = r"""
% ==========
\begin{table*}[htb]
    \centering
    \footnotesize
     \caption{Binary classification tasks generated for the experiments}
    \label{tab:datasets}
    \begin{tabular}{lccccc}
    \toprule

    \multirow{2}{*}{\textbf{Acronym (Regular vs Dropout)}} & \multirow{2}{*}{\textbf{Features}} & \multirow{2}{*}{\textbf{Examples}} & \textbf{Class} & \textbf{Class} & \textbf{Dropout} \\
    & & & \textbf{Regular} & \textbf{Dropout} & \textbf{rate} \\

    \midrule
"""

for i, row in filtered_df.iterrows():
    # Escapa os underscores nos nomes dos datasets
    dataset_name = row['Dataset'].replace('_', r'\_')
    latex += f"        {dataset_name} & 43 & {row['total_examples']} & {row['regular']} & {row['dropout']} & {row['dropout_rate']:.2f} \\\\ \n"

latex += r"""
    \bottomrule
    \end{tabular}
 
\end{table*}
% ==========
"""

print(latex)



\begin{table*}[htb]
    \centering
    \footnotesize
     \caption{Binary classification tasks generated for the experiments}
    \label{tab:datasets}
    \begin{tabular}{lccccc}
    \toprule

    \multirow{2}{*}{\textbf{Acronym (Regular vs Dropout)}} & \multirow{2}{*}{\textbf{Features}} & \multirow{2}{*}{\textbf{Examples}} & \textbf{Class} & \textbf{Class} & \textbf{Dropout} \\
    & & & \textbf{Regular} & \textbf{Dropout} & \textbf{rate} \\

    \midrule

    \bottomrule
    \end{tabular}
 
\end{table*}

