In [1]:
import pandas as pd
import numpy as np
import plotly as px
from colorama import Fore, Back, Style
import json

In [2]:
def get_maires_dict() -> dict:
    
    mairesDict = {
    "Stephane JAY" : [1896, 1904],
    "Charles RIVAIL" : [1904, 1908],
    "Felix VIALLET" : [1908, 1910],
    "Nestor CORNIER" : [1910, 1919],
    "Paul MISTRAL" : [1919, 1932],
    "Leon MARTIN" : [1932, 1935],
    "Paul COCAT" : [1935, 1944],
    "Frederic LAFLEUR" : [1944, 1945],
    "Leon MARTIN" : [1945, 1947],
    "Marius BALLY" : [1947, 1948],
    "Remy PERINETTI" : [1948, 1949],
    "Leon MARTIN" : [1949, 1959],
    "Albert MICHALLON" : [1959, 1965],
    "Hubert DUBEDOUT" : [1965, 1983],
    "Alain CARIGNON" : [1983, 1995],
    "Michel DESTOT" : [1995, 2014],
    "Eric PIOLLE" : [2014, 2021],
}
    return mairesDict

In [3]:
# Couleurs diverses
VERT='\033[0;32m' # vert mince
BLANC='\033[1;37m' # blanc gras
DEFAUT='\033[0;m' # couleur console par defaut
NOIR='\033[0;30m' # noir mince
ROUGE='\033[0;31m' # rouge mince
MARRON='\033[0;33m' # marron mince
BLEU='\033[0;34m' # bleu fonce mince
VIOLET='\033[0;35m' # violet mince
CYAN='\033[0;36m' # cyan mince
GRIS='\033[0;37m' # gris clair mince
BLANCLEGER='\033[0;38m' # blanc mince
ROUGECLAIR='\033[1;31m' # rouge clair gras
VERTCLAIR='\033[1;32m' # vert clair gras
JAUNE='\033[1;33m' # jaune gras
BLEUCLAIR='\033[1;34m' # bleu clair gras
ROSE='\033[1;35m' # rose clair gras
CYANCLAIR='\033[1;36m' # cyan clair gras

# I - Import data

In [4]:
# This csv is imported with a value added in the last ligne
# There was originally a missing value
# Here in a csv : In last rows / col 1 : 30032 was added
file_path = '../data/ESP_PUBLIC.IDENTITE_ARBRE.csv'
df = pd.read_csv(file_path, sep=',', header = 0, index_col=False)
dff = df

# II - Computations

##### Missing years for plantation

In [5]:
# Convert the column year from FLOAT --> INT
# --> Not working because of Nan : dff["ANNEEDEPLANTATION"].astype("int")
missingYear = dff["ANNEEDEPLANTATION"].isnull().value_counts()[1]
numberOfTree = dff["ANNEEDEPLANTATION"].size

print(CYANCLAIR, "Total of Trees -->", BLEUCLAIR, numberOfTree, \
      CYANCLAIR, "<-- with total of -->", BLEUCLAIR, missingYear,\
      CYANCLAIR, "<-- missing years")

[1;36m Total of Trees --> [1;34m 31619 [1;36m <-- with total of --> [1;34m 1438 [1;36m <-- missing years


##### Total of trees for specific year

In [6]:
# First replace the .nan by float(0)
# After that convert float to int
dff["ANNEEDEPLANTATION"].fillna(0).astype("int")

0        1977
1        2001
2        1975
3        1978
4        1960
         ... 
31614    2014
31615    2014
31616    2014
31617    2014
31618    1995
Name: ANNEEDEPLANTATION, Length: 31619, dtype: int64

In [7]:
firstYearPlanted = int(dff["ANNEEDEPLANTATION"].min())
lastYearPlanted = int(dff["ANNEEDEPLANTATION"].max())
print(CYANCLAIR, "The first tree was planted in -->", BLEUCLAIR, firstYearPlanted)
print(CYANCLAIR, "The last tree was planted in --->", BLEUCLAIR, lastYearPlanted)

[1;36m The first tree was planted in --> [1;34m 1900
[1;36m The last tree was planted in ---> [1;34m 2021


##### Study by years

In [8]:
df_years = dff[["ANNEEDEPLANTATION"]]
df_years.value_counts()

ANNEEDEPLANTATION
1975.0               3631
2001.0               1488
1980.0               1445
1970.0               1366
2015.0               1012
                     ... 
1959.0                 16
1924.0                 11
1964.0                  6
1900.0                  4
2021.0                  3
Length: 65, dtype: int64

In [9]:
for key in get_maires_dict().keys():
    print(CYAN, key, ROUGE, get_maires_dict()[key])

[0;36m Stephane JAY [0;31m [1896, 1904]
[0;36m Charles RIVAIL [0;31m [1904, 1908]
[0;36m Felix VIALLET [0;31m [1908, 1910]
[0;36m Nestor CORNIER [0;31m [1910, 1919]
[0;36m Paul MISTRAL [0;31m [1919, 1932]
[0;36m Leon MARTIN [0;31m [1949, 1959]
[0;36m Paul COCAT [0;31m [1935, 1944]
[0;36m Frederic LAFLEUR [0;31m [1944, 1945]
[0;36m Marius BALLY [0;31m [1947, 1948]
[0;36m Remy PERINETTI [0;31m [1948, 1949]
[0;36m Albert MICHALLON [0;31m [1959, 1965]
[0;36m Hubert DUBEDOUT [0;31m [1965, 1983]
[0;36m Alain CARIGNON [0;31m [1983, 1995]
[0;36m Michel DESTOT [0;31m [1995, 2014]
[0;36m Eric PIOLLE [0;31m [2014, 2021]


##### Create a list of tuple from 2 pandas Series

In [10]:
df_tuple = pd.DataFrame()
df_tuple['year_genre'] = list(zip(dff["ANNEEDEPLANTATION"], dff["GENRE_BOTA"]))

## III - Maire study

In [13]:
def find_maire(year:int) -> str:
    mairesDict = get_maires_dict()
    if year:
        for key in mairesDict.keys():
            if int(mairesDict[key][0]) <= int(year) < int(mairesDict[key][1]):
                return key
        
    return np.nan
dff["Maire"] = dff["ANNEEDEPLANTATION"].dropna().apply(find_maire)

In [14]:
# Extract only the four usefull column
df_maire = dff[["Maire", "ANNEEDEPLANTATION", "GENRE_BOTA", "ESPECE"]]

In [59]:
def change_column_name(df:pd.DataFrame) -> pd.DataFrame:
    valid = {"yes": True, "y": True, "ye": True, "Yes":True, "no": False, "n": False, "No": False}
    newNameList = []
    for col in df.columns.tolist():
        print(CYAN, "Do you want do rename the column", ROUGECLAIR, col,CYAN, "y / n")
        res = input()
        
        if res in list(valid.keys()):
        
            if valid[res] == True:
                print(ROUGECLAIR, "New name....")
                newName = input().upper()
                newNameList.append(newName)

            elif valid[res] == False:
                newNameList.append(col)
        else:
            print(ROUGECLAIR, "You exit the program")
            return df
        
    print(VERTCLAIR, newNameList)    
    df.columns = newNameList
    return df

In [60]:
df_maire = change_column_name(df_maire)

[0;36m Do you want do rename the column [1;31m MAIRE [0;36m y / n
d
[1;31m You exit the program


##### Fin the maire who planted the more trees

In [64]:
maire_stats = df_maire.groupby('mair').agg({'years': ['count', 'mean']})

KeyError: 'maire'

In [None]:
maskPiolle = dff["Maire"] == "Eric PIOLLE"
df_piolle = dff[maskPiolle]
df_piolle.sort_values("GENRE_BOTA")[["ANNEEDEPLANTATION", "GENRE_BOTA"]].dropna()