In [1]:
import pandas as pd
import plotly.express as px
import datetime as dt
import numpy as np
import tqdm.notebook as tqdm

In [2]:
def plot_par_jour_de_semaine(df, date):
    fig = px.histogram(df, x="weekday", title=f"Décrets votés par jour de la semaine pour la période {date}",
                      labels= {
                     "Monday": "Lundi",
                     "Tuesday": "Mardi",
                     "Wednesday": "Mercredi",
                     "Thursday": "Jeudi",
                     "Friday": "Vendredi",
                     "Saturday": "Samedi",
                     "Sunday": "Dimanche"}, text_auto=True)
    fig.update_layout(bargap=0.1)
    fig.update_xaxes(automargin=False) 
    fig.update_xaxes(categoryorder='array', categoryarray= ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
    fig.show()
    
def plot_par_an(dataframe, date):
    fig = px.histogram(dataframe, x="date",
                       title=f"Décrets votés par jour pour l'année {date}", 
                       nbins=365, 
                       color_discrete_sequence=['rebeccapurple'])
    fig.update_layout(bargap=0.1)
    fig.update_xaxes(showline=True, gridwidth=0.5, gridcolor='White')
    fig.update_xaxes(dtick="M1")
    fig.show()

def count_nbremotscorpus(dataframe):
    count = 0
    dftestest2 = dataframe['texte'].str.findall(r'(\w+)').str.len()
    for line in dftestest2:
        if np.isnan(line) == False:
            count += line
        else:
            pass
    return(count)



In [3]:
#Chargement corpus d'urgence
corpus_urgence = pd.read_csv('sous_corpus/Ensemble_decrets_urgence.tsv', sep='\t')
corpus_urgence["date"] = pd.to_datetime(corpus_urgence["date"])
df = corpus_urgence.sort_values(by="date")
df['weekday'] = df['date'].dt.day_name()
corpus_urgence.drop('Unnamed: 0', axis=1, inplace=True)

#Chargement corpus d'ensemble
corpus_ensemble = pd.read_csv('sous_corpus/Ensemble_vol_concatenes_tries.tsv', sep='\t')
corpus_ensemble["date"] = pd.to_datetime(corpus_ensemble["date"])
df2 = corpus_ensemble.sort_values(by="date")
df2['weekday'] = df2['date'].dt.day_name()

### Analyses corpus urgence

In [4]:
dataframe1=df[df['date'].dt.year == 1789]
dataframe2=df[df['date'].dt.year == 1790]
dataframe3=df[df['date'].dt.year == 1791]
dataframe4=df[df['date'].dt.year == 1792]
dataframe5=df[df['date'].dt.year == 1793]
dataframe6=df[df['date'].dt.year == 1794]
dataframe7=df[df['date'].dt.year == 1795]
dataframe8=df[df['date'].dt.year == 1796]
dataframe9=df[df['date'].dt.year == 1797]
dataframe10=df[df['date'].dt.year == 1798]
dataframe11=df[df['date'].dt.year == 1799]

In [5]:
fig = px.histogram(df, x="date", nbins=3650)
fig.update_layout(bargap=0.1)
fig.show()

In [6]:
liste = [dataframe3,
         dataframe4,
         dataframe8,
         dataframe9,
         dataframe10,
         dataframe11         
        ]
date = [1791, 1792, 1796, 1797, 1798, 1799]
count=0
for dataframe in liste:
    plot_par_jour_de_semaine(dataframe, date[count])
    count += 1

### Analyse corpus d'ensemble

In [7]:
ensemble_decrets_convention=df2[df2['date'] <= '1795-10-26']
ensemble_decrets_convention=ensemble_decrets_convention[ensemble_decrets_convention['date'] >= '1792-09-22']
ensemble_decrets_directoire = df2[df2['date'] >= '1795-10-26']
ensemble_decrets_legislative=df2[df2['date'] >= '1791-10-01']
ensemble_decrets_legislative=ensemble_decrets_legislative[ensemble_decrets_legislative['date'] <= '1792-09-22']
ensemble_decrets_constituante=df2[df2['date'] <= '1791-10-01']

urgence_decrets_directoire = df[df['date'] >= '1795-10-26']
ensemble_decrets_constituante_et_legislative=df[df['date'] <= '1792-09-22']

df2['month'] = df2['date'].dt.month
ensemble1=df2[df2['date'].dt.year == 1789]
ensemble2=df2[df2['date'].dt.year == 1790]
ensemble3=df2[df2['date'].dt.year == 1791]
ensemble4=df2[df2['date'].dt.year == 1792]
ensemble5=df2[df2['date'].dt.year == 1793]
ensemble6=df2[df2['date'].dt.year == 1794]
ensemble7=df2[df2['date'].dt.year == 1795]
ensemble8=df2[df2['date'].dt.year == 1796]
ensemble9=df2[df2['date'].dt.year == 1797]
ensemble10=df2[df2['date'].dt.year == 1798]
ensemble11=df2[df2['date'].dt.year == 1799]

liste = [ensemble_decrets_constituante,
         ensemble_decrets_legislative,
         ensemble_decrets_convention,
         ensemble_decrets_directoire      
        ]
date = ['de la constituante', 'de la législative', 'de la Convention', 'du Directoire']
count=0
for dataframe in liste:
    plot_par_jour_de_semaine(dataframe, date[count])
    count += 1

In [8]:
fig = px.histogram(df2, x="month", title=f"Décrets votés par mois",
                      labels= {
                     "1": "Janvier",
                     "2": "Fevrier",
                     "3": "Mars",
                     "4": "Avril",
                     "5": "Mai",
                     "6": "Juin",
                     "7": "Juillet",
                     "8": "Aout",
                     "9": "Septembre",
                     "10": "Octobre",
                     "11": "Novembre",
                     "12": "Decembre"})
fig.update_layout(bargap=0.1)
fig.update_xaxes(automargin=False) 
fig.update_xaxes(categoryorder='array', categoryarray= ['Janvier', 'Fevrier', 'Mars', 'Avril', 'Mai', 'Juin', 'Juillet','Aout','Septembre','Octobre','Novembre', 'Decembre'])
fig.show()

In [9]:
fig = px.histogram(df2, x="date", nbins=3650)
fig.update_layout(bargap=0.1)
fig.show()

In [10]:
liste = [ensemble1,
         ensemble5]
date = [1789, 1793]
count=0
for dataframe in liste:
    plot_par_jour_de_semaine(dataframe, date[count])
    count += 1

In [11]:
liste = [ensemble1,
         ensemble2,
         ensemble3,
         ensemble4,
         ensemble5,
         ensemble6,
         ensemble7,
         ensemble8,
         ensemble9,
         ensemble10,         
        ]
date = 1789
for dataframe in liste:
    plot_par_an(dataframe, date)
    date += 1

In [12]:
df2['urgence'] = 0
df2['urgence'] = 'décret non urgent'
for index, row in tqdm.tqdm(df2.iterrows(), total=len(df2)):
    ID = df2.iloc[index]['ID']
    for ID2 in df['ID']:
        if ID == ID2:
            df2['urgence'].loc[index] = 'décret urgent'

  0%|          | 0/28963 [00:00<?, ?it/s]



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [13]:
df_test = df2
fig = px.histogram(df_test, x="date", color="urgence", nbins=3650)
fig.update_layout(bargap=0.2)
fig.update_xaxes(showline=True, gridwidth=0.5, gridcolor='White')
fig.update_xaxes(dtick="M3")
fig.show()