In [None]:
#load data
import pandas as pd

original_data_canton_FR = pd.read_excel(r'../dataset/raw/Dep_15_Resultats_T1_complet.xlsx', sheet_name='Cantons', header=2)
original_data_Bvot_FR        = pd.read_csv('../dataset/inputs/XDataFR_Bvot.csv', sep=';')
original_data_Bvot_targets_FR = pd.read_csv('../dataset/labels/yDataFR_Bvot.csv', sep=';')

data_canton_FR = original_data_canton_FR.copy()
data_Bvot_FR   = original_data_Bvot_FR.copy()
data_Bvot_targets_FR = original_data_Bvot_targets_FR.copy()

In [None]:
def saveData(data, loc):
    # save cher_data as excel
    writer = pd.ExcelWriter(loc)
    
    # write dataframe to excel
    data.to_excel(writer)

    # save the excel
    writer.save()

In [42]:
data_canton_FR.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9097 entries, 0 to 239
Data columns (total 25 columns):
Code du département       9097 non-null object
Libellé du département    9097 non-null object
Code du canton            9097 non-null int64
Libellé du canton         9097 non-null object
Inscrits                  9097 non-null int64
Abstentions               9097 non-null int64
% Abs/Ins                 9097 non-null float64
Votants                   9097 non-null int64
% Vot/Ins                 9097 non-null float64
Blancs                    9097 non-null int64
% Blancs/Ins              9097 non-null float64
% Blancs/Vot              9097 non-null float64
Nuls                      9097 non-null int64
% Nuls/Ins                9097 non-null float64
% Nuls/Vot                9097 non-null float64
Exprimés                  9097 non-null int64
% Exp/Ins                 9097 non-null float64
% Exp/Vot                 9097 non-null float64
N°Panneau                 9097 non-null float64

In [None]:
def getNbBinomes(data):
    return len([header for header in data.columns if "Binôme" in header])

def explodeLines(data):
    initdf = data[['Code du département', 'Libellé du département', 'Code du canton', 
            'Libellé du canton', 'Inscrits', 'Abstentions', '% Abs/Ins', 'Votants',
            '% Vot/Ins', 'Blancs', '% Blancs/Ins', '% Blancs/Vot', 'Nuls', '% Nuls/Ins',
            '% Nuls/Vot', 'Exprimés', '% Exp/Ins', '% Exp/Vot']]

    headers = ['N°Panneau', 'Nuance', 'Binôme', 'Sièges', 'Voix', '% Voix/Ins', '% Voix/Exp']
    
    df = pd.DataFrame()
    for i in range(getNbBinomes(data)):
        partidf = data[[h+'.'+str(i) if i!=0 else h for h in headers]]
        partidf = pd.concat([initdf, partidf], axis=1)
        partidf.columns = pd.Index(initdf.columns.values.tolist() + headers)
        df = pd.concat([df, partidf])
    
    # Remove useless rows   
    df = df.dropna(how='all', subset=headers)
    
    return df

In [None]:
data_canton_FR = explodeLines(data_canton_FR)
# tmp = fr_data[fr_data['Code du département']==63]
# tmp = tmp[tmp['Code du canton']==4]
# tmp

tmp = fr_data.loc[(data_canton_FR['Code du département']==1 ) & (data_canton_FR['Code du canton']==1) ]
tmp

In [61]:
def getNuanceOfElected(data, col_siege='Sièges', col_nuance='Nuance'):
    elected = data[data[col_siege]=='Elus']
    return list(elected[col_nuance].unique()) if len(elected)!=0 else None

def filterBestNuances(data, col_nuance='Nuance', criteria=12.50):
    bestCandidat = data[data['% Voix/Ins']>= criteria]
    if bestCandidat.empty or len(bestCandidat)==1:
        bestCandidat = data.sort_values(by='Voix', ascending=False).iloc[0:2,:]

    return list(bestCandidat[col_nuance].unique())

def getDuels(data, dep, col_dep='Code du département', col_canton='Code du canton', col_siege='Sièges', col_nuance='Nuance'):
    '''
        ATTENTION : data doit etre EXPLODE !
    '''
    data = data[data[col_dep]==dep]
    duels = dict()
    for canton in data[col_canton].unique():
        data_canton = data[data[col_canton]==canton]
        # allow to know if there is a majority in the canton
        elected = getNuanceOfElected(data_canton, col_siege=col_siege, col_nuance=col_nuance) 
        if elected is not None:
            duels[str(canton)]= elected
        else:
            duels[str(canton)] = filterBestNuances(data_canton)
    return duels

def optimizeDuelDict(duels):
    optdic = dict()
    for dep, duelDep in duels.items():
        for canton, duel in duels[dep].items():
            key = ':'.join(duel)
            print(f'processing dep {dep} canton {canton} key {key}')
            if key in optdic.keys():
                print('append :', (dep,canton))
                optdic[key].append((dep, canton))
            else:
                optdic[key]= [(dep, canton)]
    return optdic

In [62]:
duels = dict()
for dep in data_canton_FR['Code du département'].unique():
    duels[str(dep)]= getDuels(data_canton_FR, dep)
    if duels[str(dep)]==[]:
        print('empty list for dep : ', dep)


dep 83 canton 4 key BC-UD:BC-FN
append : ('83', '4')
processing dep 83 canton 5 key BC-FN
append : ('83', '5')
processing dep 83 canton 6 key BC-UD:BC-FN
append : ('83', '6')
processing dep 83 canton 7 key BC-SOC:BC-FN
append : ('83', '7')
processing dep 83 canton 8 key BC-UD:BC-FN
append : ('83', '8')
processing dep 83 canton 9 key BC-UD:BC-FN
append : ('83', '9')
processing dep 83 canton 10 key BC-UD:BC-FN
append : ('83', '10')
processing dep 83 canton 11 key BC-UD:BC-FN
append : ('83', '11')
processing dep 83 canton 12 key BC-UD:BC-FN
append : ('83', '12')
processing dep 83 canton 13 key BC-UD:BC-FN
append : ('83', '13')
processing dep 83 canton 14 key BC-UD:BC-FN
append : ('83', '14')
processing dep 83 canton 15 key BC-UD:BC-FN
append : ('83', '15')
processing dep 83 canton 16 key BC-UG:BC-FN
append : ('83', '16')
processing dep 83 canton 17 key BC-UD:BC-FN
append : ('83', '17')
processing dep 83 canton 18 key BC-FN:BC-UD
append : ('83', '18')
processing dep 83 canton 19 key BC-UD:

In [67]:
optDuels = optimizeDuelDict(duels)

dep 83 canton 4 key BC-UD:BC-FN
append : ('83', '4')
processing dep 83 canton 5 key BC-FN
append : ('83', '5')
processing dep 83 canton 6 key BC-UD:BC-FN
append : ('83', '6')
processing dep 83 canton 7 key BC-SOC:BC-FN
append : ('83', '7')
processing dep 83 canton 8 key BC-UD:BC-FN
append : ('83', '8')
processing dep 83 canton 9 key BC-UD:BC-FN
append : ('83', '9')
processing dep 83 canton 10 key BC-UD:BC-FN
append : ('83', '10')
processing dep 83 canton 11 key BC-UD:BC-FN
append : ('83', '11')
processing dep 83 canton 12 key BC-UD:BC-FN
append : ('83', '12')
processing dep 83 canton 13 key BC-UD:BC-FN
append : ('83', '13')
processing dep 83 canton 14 key BC-UD:BC-FN
append : ('83', '14')
processing dep 83 canton 15 key BC-UD:BC-FN
append : ('83', '15')
processing dep 83 canton 16 key BC-UG:BC-FN
append : ('83', '16')
processing dep 83 canton 17 key BC-UD:BC-FN
append : ('83', '17')
processing dep 83 canton 18 key BC-FN:BC-UD
append : ('83', '18')
processing dep 83 canton 19 key BC-UD:

In [65]:
duels['2']
optDuels[':'.join(['BC-UD', 'BC-FN'])]

{'1': ['BC-UG', 'BC-FN'],
 '2': ['BC-UG', 'BC-FN'],
 '3': ['BC-UG', 'BC-FN'],
 '4': ['BC-UG', 'BC-UD', 'BC-FN'],
 '5': ['BC-UMP', 'BC-FN'],
 '6': ['BC-UG', 'BC-UD', 'BC-FN'],
 '7': ['BC-SOC', 'BC-UD', 'BC-FN'],
 '8': ['BC-UG', 'BC-FN'],
 '9': ['BC-UG', 'BC-FN'],
 '10': ['BC-UG', 'BC-FN'],
 '11': ['BC-UG', 'BC-UD', 'BC-FN'],
 '12': ['BC-UG', 'BC-UD', 'BC-FN'],
 '13': ['BC-UD', 'BC-FN'],
 '14': ['BC-UD', 'BC-FN'],
 '15': ['BC-UD', 'BC-FN'],
 '16': ['BC-UG', 'BC-UD', 'BC-FN'],
 '17': ['BC-UG', 'BC-UD', 'BC-FN'],
 '18': ['BC-UG', 'BC-FN'],
 '19': ['BC-UDI', 'BC-UMP', 'BC-FN'],
 '20': ['BC-FN'],
 '21': ['BC-FN', 'BC-UMP']}

In [None]:

# retourne un dataset 
def getTrainSets(X, y, duel, col_canton='CODCAN', col_dep='Code du département'):
    mask_dep    = pd.Series([False]*len(X))
    mask_canton = pd.Series([False]*len(X))
    for dep in X[col_dep].unique():
        for canton in X[X[col_dep]==dep][col_canton].unique(): #pour chaque canton du departement
            #on considere que les cantons qui ont comme partis les partis du duel
            if all(nuance in X.loc[(X[col_dep]==dep) & (X[col_canton]==canton)] for nuance in duel):
                mask_dep    |= X[col_dep]==dep
                mask_canton |= X[col_canton]==canton

    # filtre les departement dont aucun canton contient notre duel
    X= X[mask_dep]
    y = y[mask_dep]
    return (X[mask_canton], y[mask_canton])
                        


In [None]:
#