This notebook goal is to plot a figure using no custom function before we get to design and write useful ones

# Imports

In [38]:
# Third Parties
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import numpy as np 

# Native

# Custom 
# /

# Classes

# Functions

# Open file and prepare for plotting

In [39]:
OG = pd.read_csv(
    '../../data/2025-01-07-2024-10-29-quinquadef4-neat-abstract-bert.csv'
)
GLOBAL_INDEX = OG.index


Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.



In [40]:
OG.columns

Index(['iddef', 'titre', 'soustitre', 'auteurs', 'annee', 'revue', 'num_titre',
       'num_num', 'abstract', 'url_art', 'auteurs_fam', 'auteurs_prenom',
       'source', 'n_auteurs', 'femme', 'annee5', 'majo_femmes', 'sexe3',
       'tag_race', 'tag_inclusif', 'bert_pronoms', 'bert_quanti', 'bert_quali',
       'bert_genre', 'bert_genre_stat', 'bert_classe_large',
       'bert_classe_stricte', 'bert_genre_pas_stat', 'bert_monostricte',
       'bert_interstricte_2plus', 'bert_interstricte_3',
       'bert_interstricte_GC', 'bert_interstricte_GR', 'bert_interstricte_CR',
       'bert_interstricte_cl', 'bert_interstricte_cl2', 'bert_monolarge',
       'bert_interlarge_2plus', 'bert_interlarge_3', 'bert_interlarge_GC',
       'bert_interlarge_GR', 'bert_interlarge_CR', 'bert_interlarge_cl',
       'bert_interlarge_cl2'],
      dtype='object')

In [41]:
df = OG.loc[:,['annee','revue','bert_genre', 'bert_genre_stat',
               'bert_genre_pas_stat']]

The goal is to plot the evolution of the percentage of articles that summon the concept of gender through the year

In [42]:
years = list(set(df['annee']))

In [43]:
# get the percentage of articles summoning the concept of gender through the
# years

# we are going to isolate 4 groups: 
    # all of them
    # ARSS revue
    # a group of revues named TYPE
    # a group of revues named GENERALE

ARSS = ['Actes de la recherche en sciences sociales']
GENERALE = [
    "Actes de la recherche en sciences sociales",
    "L'Année sociologique", 
    "Revue française de sociologie",
    "Sociologie",
    "Sociétés contemporaines",
    "Sociologie du travail"
    ]
TYPE = [
    "Actes de la recherche en sciences sociales",
    "Revue française de sociologie",
    "Revue française de science politique",
    "Archives de sciences sociales des religions",
    "L’Homme",
    "Ethnologie française",
    "Population",
    "Annales. Histoire, Sciences Sociales"
    "Revue d’histoire moderne & contemporaine",
    "Réseaux",
    "Revue française d'économie",
    "Espaces et sociétés"
]

# Through The Years _ 
TTY_ALL = []
TTY_ARSS = []
TTY_TYPE = []
TTY_GENERALE = []

# Creating df dedicated to ARSS, TYPE and GENERALE 
bool_ARSS = [
    revue in ARSS for revue in df.loc[:,'revue']
]
bool_TYPE = [
    revue in TYPE for revue in df.loc[:,'revue']
]
bool_GENERALE = [
    revue in GENERALE for revue in df.loc[:,'revue']
]


df_ARSS = df.loc[bool_ARSS,:]
df_TYPE = df.loc[bool_TYPE,:]
df_GENERALE = df.loc[bool_GENERALE,:]

for y in years :
    TTY_ALL.append(
        df.loc[
            df.loc[:,'annee'] == y,     # rows
            'bert_genre'                # column
        ].mean()                        # take the mean
    )
    TTY_ARSS.append(
        df_ARSS.loc[
            df_ARSS.loc[:,'annee'] == y,
            'bert_genre'
        ].mean()
    )
    TTY_TYPE.append(
        df_TYPE.loc[
            df_TYPE.loc[:,'annee'] == y,
            'bert_genre'
        ].mean()
    )
    TTY_GENERALE.append(
        df_GENERALE.loc[
            df_GENERALE.loc[:,'annee'] == y,
            'bert_genre'
        ].mean()
    )

We are also comparing the definition used to consider an article has summoned the concept of gender. We do the same work for the `stat` definition

In [57]:
# Through The Years _ 
TTY_ALL_STAT = []
TTY_ARSS_STAT = []
TTY_TYPE_STAT = []
TTY_GENERALE_STAT = []

# NOTE regarder comment on traite les na

for y in years :
    TTY_ALL_STAT.append(
        df.loc[
            df.loc[:,'annee'] == y,     # rows
            'bert_genre_stat'                # column
        ].mean()                        # take the mean
    )
    TTY_ARSS_STAT.append(
        df_ARSS.loc[
            df_ARSS.loc[:,'annee'] == y,
            'bert_genre_stat'
        ].mean()
    )
    TTY_TYPE_STAT.append(
        df_TYPE.loc[
            df_TYPE.loc[:,'annee'] == y,
            'bert_genre_stat'
        ].mean()
    )
    TTY_GENERALE_STAT.append(
        df_GENERALE.loc[
            df_GENERALE.loc[:,'annee'] == y,
            'bert_genre_stat'
        ].mean()
    )

# Plotting

The plotting section is first going to turn all the data into a usable data frame. We are going to use the following logic. : 
> Each row is a data, it will contain the year, percentage and any tag used for the actual plotting section

## Turn into a usable dataframe

### Parameters

In [58]:
GRN_colour = 'green'
RED_colour = 'red'
BLU_colour = 'blue'
ORG_colour = 'green'

### 

In [59]:
N = len(years)
ext_vect = ['Définition extensive' for _ in range(N)]
stt_vect = ['Définition statistique' for _ in range(N)]

GRN_vect = ["Toutes " for _ in range(N)]
RED_vect = ["ARSS" for _ in range(N)]
BLU_vect = ["Revues TYPE" for _ in range(N)]
ORG_vect = ["Revues GENERALES" for _ in range(N)]

plotting_df = pd.DataFrame(dict(
    annee       = np.tile(years, 8),
    pourcentage = np.concatenate(
                        (TTY_ALL      , TTY_ALL_STAT,
                         TTY_ARSS     , TTY_ARSS_STAT,
                         TTY_TYPE     , TTY_TYPE_STAT,
                         TTY_GENERALE , TTY_GENERALE_STAT)      
                    ) ,
    line_dash_style = np.concatenate(
                        (ext_vect,  stt_vect,
                         ext_vect,  stt_vect,
                         ext_vect,  stt_vect,
                         ext_vect,  stt_vect)   
    ),
    colour_style = np.concatenate(
                        (GRN_vect,  GRN_vect,
                         RED_vect,  RED_vect,
                         BLU_vect,  BLU_vect,
                         ORG_vect,  ORG_vect)   
    )
))

In [60]:
plotting_df.head()

Unnamed: 0,annee,pourcentage,line_dash_style,colour_style
0,2001,0.076012,Définition extensive,Toutes
1,2002,0.102542,Définition extensive,Toutes
2,2003,0.09319,Définition extensive,Toutes
3,2004,0.083115,Définition extensive,Toutes
4,2005,0.096503,Définition extensive,Toutes


## Actually plotting

In [61]:
fig = px.line(
    plotting_df,
    x = 'annee', y = 'pourcentage',
    color = 'colour_style', line_dash = 'line_dash_style',
    labels = {
            'colour_style' : 'Groupe de revues',
            'line_dash_style' : 'Définition du genre'
        }
)

In [62]:
fig

### Customisation

In [63]:
# Setting title and axis names
fig = fig.update_layout(dict(
    title = ("Évolution de la proportion d'articles publiés invoquant le thème "
             "du genre (def extensive et statistique)"),
    xaxis = dict(title = "Année de publication"),
    yaxis = dict(title = "Part des articles qui invoquent le concept du genre"),    
))

In [64]:
# https://plotly.com/python/reference/layout/xaxis/
# customizing the x axis
fig = fig.update_layout(
    xaxis = dict(
        showline = True,
        linewidth = 2, 
        linecolor = 'rgb(20,20,20)',
        ticks = "outside",           # "outside", "inside", ""
        showticklabels = True,
        tickangle = 0,
        tickfont = dict(
            family = "Arial", size = 12, color = "rgb(80,80,80)"
            ),
        tickvals = [2004,2008,2012,2016,2020],
        showgrid = False,
    )
)

# customizing the y axis
fig = fig.update_layout(
    yaxis = dict(
        showline = False,
        ticks = "outside",
        showticklabels = True,
        tickangle = 0,
        tickfont = dict(
            family = "Arial", size = 12, color = "rgb(80,80,80)"
            ),
        tickvals = [0,0.1,0.2],
        tickwidth = 1,
        showgrid = True,
        gridcolor = 'rgb(80,80,80)',
        gridwidth = 1,

        autorange = False,
        range = [-0.001,0.3]
    )
)

# changing fig background color : 
fig = fig.update_layout(dict(
    plot_bgcolor='rgb(255, 255, 255)',
    paper_bgcolor='rgb(255, 255, 255)',
))

In [65]:
# Not implemented but might be interesting
# fig.update_layout(dict(
#     hovermode = "closest",
#     xaxis = dict(showspikes = True)
# ))

In [66]:
# Adding a dropdown menu https://programminghistorian.org/en/lessons/interactive-visualization-with-plotly#adding-animations-dropdown-bars

fig = fig.update_layout(
    updatemenus=[
        dict(
            type = "buttons",
            buttons=list(
                [ 
                    dict(
                        label="Toutes les courbes", 
                        method="update",
                        args=[
                            {
                                "visible": [
                                    True, True,
                                    True, True,
                                    True, True,
                                    True, True
                                    ]
                            },  # This 'view' show all three types of crime
                            {
                                "title": ("Évolution de la proportion d'articles"
                                          "publiés invoquant le thème du genre "
                                          "(def extensive et statistique)"),
                            },
                        ],
                    ),
                    dict(
                        label="Toutes les revues", 
                        method="update",
                        args=[
                            {
                                "visible": [
                                    True, True,
                                    False, False,
                                    False, False,
                                    False, False
                                    ]
                            },  # This 'view' show all three types of crime
                            {
                                "title": ("Évolution de la proportion d'articles"
                                          " publiés invoquant le thème du genre "
                                          " (def extensive et statistique)\n"
                                          " Toutes les revues confondues"),
                            },
                        ],
                    ),
                    dict(
                        label="Revues spéciales", 
                        method="update",
                        args=[
                            {
                                "visible": [
                                    False, False,
                                    True, True,
                                    True, True,
                                    True, True
                                    ]
                            },  # This 'view' show all three types of crime
                            {
                                "title": ("Évolution de la proportion d'articles"
                                          " publiés invoquant le thème du genre "
                                          " (def extensive et statistique)\n"
                                          " ARSS, groupe TYPE et GENERALE"),
                            },
                        ],
                    ),
                    dict(
                        label="Définition extensive uniquement", 
                        method="update",
                        args=[
                            {
                                "visible": [
                                    True, False,
                                    True, False,
                                    True, False,
                                    True, False
                                    ]
                            },  # This 'view' show all three types of crime
                            {
                                "title": ("Évolution de la proportion d'articles"
                                          " publiés invoquant le thème du genre "
                                          " (def extensive)"),
                            },
                        ],
                    ),
                    dict(
                        label="Définition statistique uniquement", 
                        method="update",
                        args=[
                            {
                                "visible": [
                                    False, True,
                                    False, True,
                                    False, True,
                                    False, True
                                    ]
                            },  # This 'view' show all three types of crime
                            {
                                "title": ("Évolution de la proportion d'articles"
                                          " publiés invoquant le thème du genre"
                                          " (def statistique)"),
                            },
                        ],
                    ),
                ]
            )
        )
    ]
)
# TODO check dimension des boutons 

In [72]:
# setting the dimentions
fig = fig.update_layout(dict(height = 600, width = 1000))

In [73]:
# Customise the legend
fig = fig.update_layout(dict(
    legend = dict(
        bgcolor = 'rgba(230,230,230,0.6)',
        title = dict(text = 'Données exposées', side = "top center",
                     font = dict(color = "red", variant = "all-small-caps")),
        itemdoubleclick = "toggleothers", itemclick = "toggle"
    )
))

In [74]:
# Fixing the hover information
fig = fig.update_traces(hovertemplate = None)
fig = fig.update_layout(hovermode="x")
# Customising hover
fig = fig.update_layout(
    hoverlabel = dict(namelength = 0)
)

# TODO Arrondir les valeurs
# TODO checker si on peut rajouter une ligne directrice

In [75]:
# change the names of the plots 
names_of_curves = [
    'Toutes revues confondues (ext)',
    'Toutes revues confondues (stat)',
    'ARSS (ext)',
    'ARSS (stat)',
    'Revue TYPE (ext)',
    'Revue TYPE (stat)',
    'Revue GENERALE (ext)',
    'Revue GENERALE (stat)',
]

for name, plot in zip(names_of_curves, fig.data):
    plot.update(name = name)

# change the colours of the plots 
colours_of_curves = [
    '#3E50B6',
    '#3E50B6',
    '#29BD00',
    '#29BD00',
    '#F202D6',
    '#F202D6',
    '#F46E01',
    '#F46E01',
]

for name,colour, plot in zip(names_of_curves,colours_of_curves, fig.data):
    plot.update(name = name)
    plot.update(line_color = colour)
    plot.update(line_width = 2)

# The plot

In [76]:
fig.show()#{'responsive' : False})

In [None]:
double click pour griser plutôt que faire disparaitre 

In [56]:
with open('test.html','w') as file:
    file.write(fig.to_html())

In [None]:
# https://plotly.com/python/interactive-html-export/#full-parameter-documentation
fig.write_html('test_.html', include_plotlyjs = False)

In [None]:
fig.data[0].update(name = 'A')

In [None]:
'width' in fig.data[0]