# Plot small multiple of number of words per species

In [None]:
import pandas as pd
import sqlite3
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import math
from classification_1_parsing_parrotorno import make_dictionnary
# Style of the figure
# plt.style.use('seaborn-whitegrid')
sns.set(style="whitegrid", color_codes=True)

In [None]:


# Create a SQL connection to our SQLite database
con = sqlite3.connect("DATABASES/project.db")

cur = con.cursor()

ana_df = pd.read_sql_query("SELECT * from classification_1_analysis", con)
map_df= pd.read_sql_query("SELECT * from mapping_cites", con)

In [None]:
%%capture
bag_of_words=make_dictionnary();

In [None]:

def small_multiples_plot(Sharey=True, row=9, column=8) :
    """Plot all the speicies as subplots of a figure. We can change if the y-axis
    is shared."""
    
    #Set up a colormap according to the intensity of the count
    cmap = 'coolwarm'
    array_of_tot=ana_df.sum(axis=0)[2:].astype(int)
    norm = plt.Normalize(array_of_tot.values.min(), array_of_tot.values.max())
    sm = matplotlib.cm.ScalarMappable(cmap=cmap, norm=norm)
    #Max number of words (to make the width of the bar the same)
    maxn=11 # according to the African grey that has the most number of words that have matched
    #Default height
    min_highhght = 0
    fig, axes = plt.subplots(row, column,figsize= (32,24), sharey = Sharey, sharex = False, frameon=True)
    sns.color_palette("Greens_d", 5)
    stopwords=["parrot", "macaw", "parot", "ara", "amazon", "amazone"]
    for i in range(0,(column*row),1): #len(map_df)
        sbplt = axes[math.floor((i)/column), (i)%column]
        #Try to delete extra subplot
        try:
            words=bag_of_words[i+1];
            list_tot=[]
            list_words=[] #Different of the provided words because not all have a match
            for word in words :
                #We don't want to common word and we don't want to plot the same word 2 times.
                if (word not in list_words) and (word not in stopwords):
                    try :
                        total=ana_df[f"word_{word}"].sum()
                        list_tot.append(total)
                        list_words.append(word)
                    except :
                        pass
                        #print(f"Not found : {word}")
                else : 
                    pass
            x = np.arange(len(list_words))
            sbplt.bar(x, height=list_tot, width=0.60, bottom=min_highhght, 
                   color=plt.get_cmap(cmap)(norm(list_tot)), linewidth=2)
            title=map_df.at[i, "scientific_name_cites"]
            sbplt.set_title(f"{title}")
            sbplt.set_ylabel('Nombre de mots')
            sbplt.set_xticks(x)
            sbplt.set_xticklabels(list_words, rotation=45)
            sbplt.set_xlim(-0.5,maxn-0.5)
            # sbplt.XAxis.FontSize = 6
            sbplt.tick_params(axis='both', which='major', labelsize=10)
             
        except :
            fig.delaxes(sbplt)

    plt.ylabel("Nombre de mots")
    fig.suptitle(f"Nombre d'occurrences des mots par espèce (Y-axis shared = {Sharey})", y=1, fontsize=30)
    fig.tight_layout()

    fig.colorbar(sm,ax=axes, pad=0.005,  aspect=100, ticks=np.arange(array_of_tot.values.min()-1,array_of_tot.values.max()+1,100))

    plt.savefig(f"./graphes/classification_1_small_multiples_Sharey_{Sharey}.png", format="png",bbox_inches='tight')


In [None]:
small_multiples_plot(Sharey=True)
small_multiples_plot(Sharey=False)