# Init

In [None]:
# Import utils
import numpy as np
import pandas as pd
import math
import time
import json
import pyreadr
import pickle
from joblib import dump, load, Parallel, delayed
import os
import copy
import datetime as dt
from tqdm import tqdm


# Import ML models
import sklearn
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit

from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor

from dddex.levelSetKDEx_univariate import LevelSetKDEx, LevelSetKDEx_NN
from dddex.wSAA import RandomForestWSAA, SampleAverageApproximation

# Weights & Biases
import wandb

# Import Gurobi
import gurobipy as gp
from gurobipy import GRB

# Optimization Module
from DataDrivenPatientScheduling import WeightsModel
from DataDrivenPatientScheduling import Experiment

In [None]:
# Setup directories and model names
directories_setup = dict(

    # Paths
    path_data = '/home/fesc/dddex/PatientScheduling/Data',
    path_models = '/home/fesc/dddex/PatientScheduling/Data/Models',
    path_results = '/home/fesc/dddex/PatientScheduling/Data/Results',
    
    # Models
    LSx_LGBM = 'LSx_LGBM',
    LSx_NN_LGBM = 'LSx_NN_LGBM',
    wSAA_RF = 'wSAA_RF',
    SAA = 'SAA'
)

# Make all experiment variables visible locally
locals().update(directories_setup)

# Pre-processing

In [None]:
# Load data
Y_data = pd.read_csv('/home/fesc/dddex/PatientScheduling/Data/Y_data.csv')
X_data = pd.read_csv('/home/fesc/dddex/PatientScheduling/Data/X_data.csv')
ID_data = pd.read_csv('/home/fesc/dddex/PatientScheduling/Data/ID_data.csv')

In [None]:
# Train-test split (for initial model training)
y_train = np.array(Y_data.loc[ID_data.train_test == 'train']).flatten()
X_train = np.array(X_data.loc[ID_data.train_test == 'train'])
ID_train = ID_data.loc[ID_data.train_test == 'train']

In [None]:
# CV folds
tscv = TimeSeriesSplit(n_splits=3)
cv_folds = tscv.split(range(len(ID_train)))
cv_folds = list(cv_folds)

# Model training

## (a) LSx - LGBM

In [None]:
# Setup the model
model_setup = dict(

    ## Point estimator
    point_estimator_params = dict(

        # Meta parameters
        model_params = {
            'random_state': 12345,
            'n_jobs': 4,
            'verbose': -1
        },

        # Tuning meta params
        tuning_params = {     
            'n_iter': 200,
            'scoring': {'MSE': 'neg_mean_squared_error'},
            'return_train_score': True,
            'refit': 'MSE',
            'random_state': 12345,
            'n_jobs': 8,
            'verbose': 0
        },    

       # Hyper params search grid
       hyper_params_grid = {
            'num_leaves': [x for x in range(5, 500, 5)],
            'max_depth': [-1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'min_child_samples': [x for x in range(5, 500, 5)],
            'learning_rate': [x/100 for x in range(1, 20+1, 1)],
            'n_estimators': [100, 200, 300, 400, 500],
            'subsample': [x/100 for x in range(5, 100+1, 5)],
            'colsample_bytree': [x/100 for x in range(5, 100, 5)]
        },  
    ),
          
 
    ## Density estimator
    density_estimator_params = dict(
    
        # Meta parameters
        model_params = {
            'weightsByDistance': False
        },

        # Tuning meta params
        tuning_params = {     
            'probs': [0.005, 0.025, 0.165, 0.250, 0.500, 0.750, 0.835, 0.975, 0.995],
            # 'probs': sorted(list(set(np.concatenate([np.array([0.005, 0.025, 0.165, 0.25, 0.50, 0.835, 0.75, 0.975, 0.995]), 
            #                                          np.arange(1, 100, 1) / 100])))),
            'n_jobs': 8,
        },    

        # Hyper params search grid
        hyper_params_grid = {
            'binSize': [x for x in range(10, 500, 10)]
        },
        
    )
)

# Make all model variables visible locally
locals().update(model_setup)

# Initialize modules
weightsmodel = WeightsModel()

### Tune point estimator

In [None]:
# Update params
locals().update(point_estimator_params)

# Point estimator
point_estimator = LGBMRegressor(**model_params)

# Tune point estimator
point_estimator = weightsmodel.tune_point_estimator(X_train, y_train, point_estimator, cv_folds, hyper_params_grid, 
                                                    tuning_params, random_search=True, print_time=True)

### Tune density estimator

In [None]:
# Update params
locals().update(density_estimator_params)

# Density estimator
density_estimator = LevelSetKDEx(estimator = point_estimator, **model_params)

# Tune density estimator
density_estimator = weightsmodel.tune_density_estimator(X_train, y_train, density_estimator, cv_folds, hyper_params_grid, 
                                                        tuning_params, random_search=False, print_time=True)

# Save
save = dump(density_estimator, path_models+'/density_estimator_'+LSx_LGBM+'.joblib')

## (b) LSx NN - LGBM

In [None]:
# Setup the model
model_setup = dict(
        
    ## Point estimator
    point_estimator_params = dict(

        # Meta parameters
        model_params = {
            'random_state': 12345,
            'n_jobs': 4,
            'verbose': -1
        },

        # Tuning meta params
        tuning_params = {     
            'n_iter': 200,
            'scoring': {'MSE': 'neg_mean_squared_error'},
            'return_train_score': True,
            'refit': 'MSE',
            'random_state': 12345,
            'n_jobs': 8,
            'verbose': 0
        },    

       # Hyper params search grid
       hyper_params_grid = {
            'num_leaves': [x for x in range(5, 500, 5)],
            'max_depth': [-1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'min_child_samples': [x for x in range(5, 500, 5)],
            'learning_rate': [x/100 for x in range(1, 20+1, 1)],
            'n_estimators': [100, 200, 300, 400, 500],
            'subsample': [x/100 for x in range(5, 100+1, 5)],
            'colsample_bytree': [x/100 for x in range(5, 100, 5)]
        },  
    ),
          
 
    ## Density estimator
    density_estimator_params = dict(
    
        # Meta parameters
        model_params = {
            'weightsByDistance': True
        },

        # Tuning meta params
        tuning_params = {     
            'probs': [0.005, 0.025, 0.165, 0.250, 0.500, 0.750, 0.835, 0.975, 0.995],
            # 'probs': sorted(list(set(np.concatenate([np.array([0.005, 0.025, 0.165, 0.25, 0.50, 0.835, 0.75, 0.975, 0.995]), 
            #                                          np.arange(1, 100, 1) / 100])))),
            'n_jobs': 8,
        },    

        # Hyper params search grid
        hyper_params_grid = {
            'binSize': [x for x in range(10, 500, 10)]
        },
        
    )
)

# Make all model variables visible locally
locals().update(model_setup)

# Initialize modules
weightsmodel = WeightsModel()

### Tune point estimator

In [None]:
# Update params
locals().update(point_estimator_params)

# Point estimator
point_estimator = LGBMRegressor(**model_params)

# Tune point estimator
point_estimator = weightsmodel.tune_point_estimator(X_train, y_train, point_estimator, cv_folds, hyper_params_grid, 
                                                    tuning_params, random_search=True, print_time=True)

### Tune density estimator

In [None]:
# Update params
locals().update(density_estimator_params)

# Density estimator
density_estimator = LevelSetKDEx_NN(estimator = point_estimator, **model_params)

# Tune density estimator
density_estimator = weightsmodel.tune_density_estimator(X_train, y_train, density_estimator, cv_folds, hyper_params_grid, 
                                                        tuning_params, random_search=False, print_time=True)

# Save
save = dump(density_estimator, path_models+'/density_estimator_'+LSx_NN_LGBM+'.joblib')

## (c) wSAA - RF

In [None]:
# Setup the model
model_setup = dict(
        
    ## Density estimator
    density_estimator_params = dict(

        # Meta parameters
        model_params = {
            'random_state': 12345,
            'n_jobs': 8,
            'verbose': 0
        },

        # Tuning meta params
        tuning_params = {     
            'probs': [0.005, 0.025, 0.165, 0.250, 0.500, 0.750, 0.835, 0.975, 0.995],
            # 'probs': sorted(list(set(np.concatenate([np.array([0.005, 0.025, 0.165, 0.25, 0.50, 0.835, 0.75, 0.975, 0.995]), 
            #                                          np.arange(1, 100, 1) / 100])))),
            'nIter': 200,
            'random_state': 12345,
            'n_jobs': 8
        },    

       # Hyper params search grid
       hyper_params_grid = {
            'n_estimators': [100, 200, 300, 400, 500],
            'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            'min_samples_split': [x for x in range(5, 500, 5)],
            'min_samples_leaf': [x for x in range(5, 500, 5)],
            'max_features': [x for x in range(5, 100, 5)],
            'max_leaf_nodes': [x for x in range(5, 500, 5)],
            'min_impurity_decrease': [x/100 for x in range(1, 20+1, 1)],
            'bootstrap': [True],
            'max_samples': [x/100 for x in range(5, 100+1, 5)]           
        },          
    )
)

# Make all model variables visible locally
locals().update(model_setup)

# Initialize modules
weightsmodel = WeightsModel()

### Tune density estimator

In [None]:
# Update params
locals().update(density_estimator_params)

# Density estimator
density_estimator = RandomForestWSAA(**model_params)

# Tune density estimator
density_estimator = weightsmodel.tune_density_estimator(X_train, y_train, density_estimator, cv_folds, hyper_params_grid, 
                                                        tuning_params, random_search=True, print_time=True)

# Save
save = dump(density_estimator, path_models+'/density_estimator_'+wSAA_RF+'.joblib')

# Data-driven optimization

In [None]:
# Setup the experiment
optimization_params = dict(

    # Gurobi params
    gurobi_params = {

        'LogToConsole': 0, 
        'Threads': 2

    },

    # Cost params
    cost_params = [

        {'CR': 0.10, 'c_waiting_time': 1, 'c_overtime': 9},
        {'CR': 0.25, 'c_waiting_time': 1, 'c_overtime': 3},
        {'CR': 0.50, 'c_waiting_time': 1, 'c_overtime': 1},
        {'CR': 0.75, 'c_waiting_time': 3, 'c_overtime': 1},
        {'CR': 0.90, 'c_waiting_time': 9, 'c_overtime': 1}

    ],

    # Number of scenarios
    K = 10**1,

    # Time budget multiplier
    rho = 1.15,

    # n parallel jobs
    n_jobs = 32

)

# Make all experiment variables visible locally
locals().update(optimization_params)

## (a) LSx - LGBM

In [None]:
# Initialize modules
experiment = Experiment()

# Load density estimator
density_estimator = load(path_models+'/density_estimator_'+LSx_LGBM+'.joblib')

# Test dates
dates = pd.Series(list(set(ID_data.loc[ID_data.train_test == 'test', 'date']))).sort_values()

# Timer
start_time, st_exec, st_cpu = dt.datetime.now().replace(microsecond=0), time.time(), time.process_time()      
        
# For each date in the test horizon
with experiment.tqdm_joblib(tqdm(desc='Progress', total=len(dates))) as progress_bar:
    results = Parallel(n_jobs=n_jobs)(delayed(experiment.run)(
        X = np.array(X_data),
        y = np.array(Y_data),
        date = date,
        dates = ID_data['date'],
        areas = ID_data['area'],
        weightsModel = density_estimator,
        cost_params = cost_params,
        gurobi_params = gurobi_params,
        K = K,
        rho = rho,
        print_status = False) for date in dates)
    
# Finalize
results = pd.concat(results).reset_index(drop=True)

# Save results
results.to_csv(path_results+'/'+LSx_LGBM+'_K'+str(K)+'_rho'+str(rho)+'.csv', sep=',', index=False)

# Time
print('Time:', dt.datetime.now().replace(microsecond=0) - start_time)  
print('>> Execution time:', np.around(time.time()-st_exec, 0), "seconds") 
print('>> CPU time:', np.around(time.process_time()-st_cpu, 0), "seconds")

## (b) LSx NN - LGBM

In [None]:
# Initialize modules
experiment = Experiment()

# Load density estimator
density_estimator = load(path_models+'/density_estimator_'+LSx_NN_LGBM+'.joblib')

# Test dates
dates = pd.Series(list(set(ID_data.loc[ID_data.train_test == 'test', 'date']))).sort_values()

# Timer
start_time, st_exec, st_cpu = dt.datetime.now().replace(microsecond=0), time.time(), time.process_time()      
        
# For each date in the test horizon
with experiment.tqdm_joblib(tqdm(desc='Progress', total=len(dates))) as progress_bar:
    results = Parallel(n_jobs=n_jobs)(delayed(experiment.run)(
        X = np.array(X_data),
        y = np.array(Y_data),
        date = date,
        dates = ID_data['date'],
        areas = ID_data['area'],
        weightsModel = density_estimator,
        cost_params = cost_params,
        gurobi_params = gurobi_params,
        K = K,
        rho = rho,
        print_status = False) for date in dates)
    
# Finalize
results = pd.concat(results).reset_index(drop=True)

# Save results
results.to_csv(path_results+'/'+LSx_NN_LGBM+'_K'+str(K)+'_rho'+str(rho)+'.csv', sep=',', index=False)

# Time
print('Time:', dt.datetime.now().replace(microsecond=0) - start_time)  
print('>> Execution time:', np.around(time.time()-st_exec, 0), "seconds") 
print('>> CPU time:', np.around(time.process_time()-st_cpu, 0), "seconds")

## (c) wSAA RF

In [None]:
# Initialize modules
experiment = Experiment()

# Load density estimator
density_estimator = load(path_models+'/density_estimator_'+wSAA_RF+'.joblib')

# Test dates
dates = pd.Series(list(set(ID_data.loc[ID_data.train_test == 'test', 'date']))).sort_values()

# Timer
start_time, st_exec, st_cpu = dt.datetime.now().replace(microsecond=0), time.time(), time.process_time()      
        
# For each date in the test horizon
with experiment.tqdm_joblib(tqdm(desc='Progress', total=len(dates))) as progress_bar:
    results = Parallel(n_jobs=n_jobs)(delayed(experiment.run)(
        X = np.array(X_data),
        y = np.array(Y_data),
        date = date,
        dates = ID_data['date'],
        areas = ID_data['area'],
        weightsModel = density_estimator,
        cost_params = cost_params,
        gurobi_params = gurobi_params,
        K = K,
        rho = rho,
        print_status = False) for date in dates)
    
# Finalize
results = pd.concat(results).reset_index(drop=True)

# Save results
results.to_csv(path_results+'/'+wSAA_RF+'_K'+str(K)+'_rho'+str(rho)+'.csv', sep=',', index=False)

# Time
print('Time:', dt.datetime.now().replace(microsecond=0) - start_time)  
print('>> Execution time:', np.around(time.time()-st_exec, 0), "seconds") 
print('>> CPU time:', np.around(time.process_time()-st_cpu, 0), "seconds")

## (d) SAA

In [None]:
# Initialize modules
experiment = Experiment()

# Test dates
dates = pd.Series(list(set(ID_data.loc[ID_data.train_test == 'test', 'date']))).sort_values()

# Timer
start_time, st_exec, st_cpu = dt.datetime.now().replace(microsecond=0), time.time(), time.process_time()      
        
# For each date in the test horizon
with experiment.tqdm_joblib(tqdm(desc='Progress', total=len(dates))) as progress_bar:
    results = Parallel(n_jobs=n_jobs)(delayed(experiment.run)(
        X = np.array(X_data),
        y = np.array(Y_data),
        date = date,
        dates = ID_data['date'],
        areas = ID_data['area'],
        weightsModel = SampleAverageApproximation(),
        cost_params = cost_params,
        gurobi_params = gurobi_params,
        K = K,
        rho = rho,
        print_status = False) for date in dates)
    
# Finalize
results = pd.concat(results).reset_index(drop=True)

# Save results
results.to_csv(path_results+'/'+SAA+'_K'+str(K)+'_rho'+str(rho)+'.csv', sep=',', index=False)

# Time
print('Time:', dt.datetime.now().replace(microsecond=0) - start_time)  
print('>> Execution time:', np.around(time.time()-st_exec, 0), "seconds") 
print('>> CPU time:', np.around(time.process_time()-st_cpu, 0), "seconds")

## Post-processing

In [None]:
# Load results
results_LSx_LGBM = pd.read_csv(path_results+'/'+LSx_LGBM+'_K'+str(K)+'_rho'+str(rho)+'.csv')
results_LSx_LGBM['model'] = 'LSx_LGBM'

results_LSx_NN_LGBM = pd.read_csv(path_results+'/'+LSx_NN_LGBM+'_K'+str(K)+'_rho'+str(rho)+'.csv')
results_LSx_NN_LGBM['model'] = 'LSx_LGBM_NN'

results_wSAA_RF = pd.read_csv(path_results+'/'+wSAA_RF+'_K'+str(K)+'_rho'+str(rho)+'.csv')
results_wSAA_RF['model'] = 'wSAA_RF'

results_SAA = pd.read_csv(path_results+'/'+SAA+'_K'+str(K)+'_rho'+str(rho)+'.csv')
results_SAA['model'] = 'SAA'

In [None]:
# Combine
results = pd.concat([
    results_LSx_LGBM[['model', 'date', 'area', 'CR', 'cost', 'overtime', 'waiting_time']],
    results_LSx_NN_LGBM[['model', 'date', 'area', 'CR', 'cost', 'overtime', 'waiting_time']],
    results_wSAA_RF[['model', 'date', 'area', 'CR', 'cost', 'overtime', 'waiting_time']]
])

results = pd.merge(
    results, 
    results_SAA[['date', 'area', 'CR', 'cost', 'overtime', 'waiting_time']],
    on=['date', 'area', 'CR'],
    suffixes=('', '_SAA')
)

In [None]:
# Coefficient of prescriptiveness
results['pq'] = results.cost / results.cost_SAA
results.loc[results.cost == results.cost_SAA, 'pq'] = 1

In [None]:
# Save results
results.to_csv(path_results+'/results_K'+str(K)+'_rho'+str(rho)+'.csv', sep=',', index=False)

# <<<<<<<<<< ARCHIVE >>>>>>>>>>

In [None]:
# Analysze (median)
results.groupby(['CR', 'area', 'model']).agg(pq=('pq', np.median)).reset_index()

In [None]:
# ... for selected CR
results.loc[results.CR.isin([0.10, 0.25, 0.50])].groupby(['CR', 'area', 'model']).agg(pq=('pq', np.median)).reset_index()

In [None]:
# ... for selected CR and area
results.loc[
    results.CR.isin([0.10, 0.25, 0.50]) & 
    total.area.isin(['Bereich_Gastroskopie', 'Bereich_Koloskopie'])].groupby(['CR', 'area', 'model']).agg(
    pq=('pq', np.median)).reset_index()

In [None]:
# Analysze (total) --- NEW
total = results.groupby(['CR', 'area', 'model']).agg(total_cost=('cost', sum), total_cost_SAA=('cost_SAA', sum)).reset_index()
total['pq'] = total.total_cost / total.total_cost_SAA
total

In [None]:
# Analysze (total)
total = results.groupby(['CR', 'area', 'model']).agg(total_cost=('cost', sum), total_cost_SAA=('cost_SAA', sum)).reset_index()
total['pq'] = total.total_cost / total.total_cost_SAA
total

In [None]:
# ... for selected CR
total.loc[total.CR.isin([0.10, 0.25, 0.50])]

In [None]:
# ... for selected CR and area
total.loc[total.CR.isin([0.10, 0.25, 0.50]) & total.area.isin(['Bereich_Gastroskopie', 'Bereich_Koloskopie'])]

In [None]:
# ... for selected CR and area --- NEW
total.loc[total.CR.isin([0.10, 0.25, 0.50]) & total.area.isin(['Bereich_Gastroskopie', 'Bereich_Koloskopie'])]

# Scaled pinball loss

In [None]:
#### ... 
def predict_quantiles(X, y, date, dates, areas, weightsModel = None, 
             quantiles = [0.005, 0.025, 0.165, 0.250, 0.500, 0.750, 0.835, 0.975, 0.995],
             print_status = False):



    """

    ...

    Arguments:

        X: ...
        y: ...
        date: ...
        dates: ...
        areas: ...
        weightsModel = None: ...
        quantiles = [0.005, 0.025, 0.165, 0.250, 0.500, 0.750, 0.835, 0.900, 0.975, 0.995]: ...
        print_status = True: ...

    Returns:

        results(pd.DataFrame): ...


    """

    # Train-test split
    y_train, y_test = y[dates < date].flatten(), y[dates == date].flatten()
    X_train, X_test = X[dates < date], X[dates == date]
    dates_train, dates_test = dates[dates < date], dates[dates == date]
    areas_train, areas_test = areas[dates < date], areas[dates == date]

    # Fit weights model
    weightsModel.fit(X_train, y_train)

    # Initialize
    results = {}

    # For each area 
    for area in list(set(areas_test)):

        # Select test data for current area
        y_test_ = y_test[area == areas_test]
        X_test_ = X_test[area == areas_test]

        # Fit SAA on data of current area
        SAA = SampleAverageApproximation()
        SAA.fit(y_train[area == areas_train])

        # Predict quantiles for each patient
        q_hat = weightsModel.predict(X_test_, probs=quantiles, outputAsDf=False)
        q_hat_saa = SAA.predict(X_test_, probs=quantiles, outputAsDf=False)

        # Add to results
        results[area] = pd.merge(
            left = pd.concat(pd.DataFrame({'prob': p, 'q_hat': q}) for p, q in q_hat.items()).reset_index(names='j'),
            right = pd.concat(pd.DataFrame({'prob': p, 'q_hat_saa': q, 'y': y_test_}) for p, q in q_hat_saa.items()).reset_index(names='j'),
            on = ['prob', 'j']
        )
        
    # Finalize
    results = pd.concat(results).reset_index(names=['area', '']).drop(columns='')

    return results

In [None]:
# Update params
locals().update(optimization_params)

# Test dates
dates = pd.Series(list(set(ID_data.loc[ID_data.train_test == 'test', 'date']))).sort_values()

# Timer
start_time, st_exec, st_cpu = dt.datetime.now().replace(microsecond=0), time.time(), time.process_time()      
        
# For each date in the test horizon
with experiment.tqdm_joblib(tqdm(desc='Progress', total=len(dates))) as progress_bar:
    results = Parallel(n_jobs=n_jobs)(delayed(predict_quantiles)(
        X = np.array(X_data),
        y = np.array(Y_data),
        date = date,
        dates = ID_data['date'],
        areas = ID_data['area'],
        weightsModel = density_estimator,
        quantiles = [0.005, 0.025, 0.165, 0.250, 0.500, 0.750, 0.835, 0.975, 0.995],
        print_status = False) for date in dates)

# Finalize
results = pd.concat(results, keys=dates).reset_index(names=['date', '']).drop(columns='')

# Time
print('Time:', dt.datetime.now().replace(microsecond=0) - start_time)  
print('>> Execution time:', np.around(time.time()-st_exec, 0), "seconds") 
print('>> CPU time:', np.around(time.process_time()-st_cpu, 0), "seconds")

In [None]:
# Scaled Pinball Loss
def scaled_pinball_loss(p, q, q_saa, y, **kwargs):

    """

    ...

    """
    q = np.array(q).flatten()
    q_saa = np.array(q_saa).flatten()
    y = np.array(y).flatten()
    

    # Pinball Loss Model
    pl = np.mean((y - q) * p * (q <= y) + (q - y) * (1 - p) * (q > y))

    # Pinball Loss SAA
    pl_saa = np.mean((y - q_saa) * p * (q_saa <= y) + (q_saa - y) * (1 - p) * (q_saa > y))

    # Scaled Pinball Loss
    with np.errstate(divide='ignore'):
        spl = (pl == pl_saa) * 1.0 + (pl != pl_saa) * (pl / pl_saa)

    return spl

In [None]:
spl = results.groupby(['area', 'prob']).apply(
    
    lambda df: pd.Series({'spl': scaled_pinball_loss(p=df.prob, q=df.q_hat, q_saa=df.q_hat_saa, y=df.y)
    
    })

).reset_index()

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Number of cases per day by treatment
plotData = spl.groupby(['area'])

# Plot
fig, ax = plt.subplots()
fig.set_size_inches(10, 5)

for area, d in plotData:
    ax.plot(d['prob'], d['spl'], marker='', linestyle='-', ms=2, linewidth=2, label=area)
plt.axhline(y = 1, color = 'grey', linestyle = '--', linewidth=1)
ax.legend()
#ax.get_xaxis().set_visible(False)
plt.show()