In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import tsgm
import tensorflow as tf
from tensorflow import keras

In [2]:
pd.set_option('display.max_rows',None)
AR6 = pd.read_csv('AR6_Scenarios_Database_World_v1.1.csv')

In [None]:
sr = pd.read_excel('iamc15_scenario_data_world_r2.0.xlsx',sheet_name='data')
sr.head()

In [None]:
sr_Model_Scenario = pd.read_csv('SR15_mapping_class.csv')
sr_Model_Scenario

In [5]:
sr_CCS = sr[sr['Variable'] == 'Carbon Sequestration|CCS']
sr_PrimaryCoal = sr[sr['Variable'] == 'Primary Energy|Coal']
sr_PrimaryOil = sr[sr['Variable'] == 'Primary Energy|Oil']
sr_PrimaryGas = sr[sr['Variable'] == 'Primary Energy|Gas']
sr_FinalLiquid = sr[sr['Variable'] == 'Final Energy|Liquids']
sr_Kyoto_Gase = sr[sr['Variable'] == 'Emissions|Kyoto Gases']

In [6]:
#Select data for a specific year
def select_years(dataframe,years):
    years_list = [str(year) for year in years ]
    
    
    selected_dataframe = dataframe.loc[:,['Model','Scenario'] + years_list].reset_index(drop=True)
    selected_dataframe[years_list] = selected_dataframe[years_list].fillna(selected_dataframe[years_list].mean())
    
    if selected_dataframe.isnull().any().any():
        print(f'There is a missing value')
        missing_columns = selected_dataframe[selected_dataframe.isnull().any()].tolist()
        print(f"Missing column exists{missing_columns}")
    
        
    return selected_dataframe

In [7]:
sr_CCS_2020_2100 = select_years(sr_CCS,range(2020,2101,10))
sr_PrimaryCoal_2020_2100 = select_years(sr_PrimaryCoal,range(2020,2101,10))
sr_PrimaryOil_2020_2100 = select_years(sr_PrimaryOil,range(2020,2101,10))
sr_PrimaryGas_2020_2100 = select_years(sr_PrimaryGas,range(2020,2101,10))
sr_FinalLiquid_2020_2100 = select_years(sr_FinalLiquid,range(2020,2101,10))
sr_Kyoto_Gase_2020_2100 = select_years(sr_Kyoto_Gase,range(2020,2101,10))


In [8]:
sr_variables = [sr_CCS_2020_2100,sr_FinalLiquid_2020_2100,sr_PrimaryCoal_2020_2100,sr_PrimaryGas_2020_2100,
                sr_PrimaryOil_2020_2100,sr_Kyoto_Gase_2020_2100]

In [None]:
for i in range(len(sr_variables)):
    sr_variables[i] = pd.merge(sr_Model_Scenario,sr_variables[i],on=['Model','Scenario'],how='inner').reset_index(drop=True)
sr_variables[0]['class'].value_counts()
sr_variables[1]['class'].value_counts()

In [10]:
_1p5_2050 = []
for variable in sr_variables:
    _1p5_2050.append(variable[variable['class'] == '1p5']['2050'].values)

_1p5_2100 = []
for variable in sr_variables:
    _1p5_2100.append(variable[variable['class'] == '1p5']['2100'].values)


_2p0_2050 = []
for variable in sr_variables:
    _2p0_2050.append(variable[variable['class'] == '2p0']['2050'].values)

_2p0_2100 = []
for variable in sr_variables:
    _2p0_2100.append(variable[variable['class'] == '2p0']['2100'].values)



In [11]:
#Set Global Random Seed
global_seed = 3
tf.random.set_seed(global_seed)
np.random.seed(global_seed)

In [12]:
#Save Kyoto_Gases data (2020-2100), consider the case of C1-C8 for simplicity.
ar6_Kyoto_Gases = pd.read_csv('Kyoto Gases.csv')
ar6_Kyoto_Gases = ar6_Kyoto_Gases[ar6_Kyoto_Gases['Category'].isin(['C1','C2','C3','C4','C5','C6','C7','C8'])]
mapping = {'C1':0,'C2':0,'C3':0,'C4':0,'C5':1,'C6':1,'C7':2,'C8':2}
ar6_Kyoto_Gases['Category'].replace(mapping,inplace=True)
ar6_Kyoto_Gases.reset_index(drop=True,inplace=True)

In [13]:
ar6_Kyoto_Gases.drop(columns=['Category_name'],inplace = True)

In [14]:
#Load a dataset of individual variables
ar6_CarbonSequestration = pd.read_csv('Carbon_Sequestration_CCS_imputed.csv')
ar6_FinalEnergy_Liquid = pd.read_csv('Final Energy_Liquids.csv')
ar6_PrimaryEnergy_Gas = pd.read_csv('Primary Energy_Gas.csv')
ar6_PrimaryEnergy_Oil = pd.read_csv('Primary Energy_Oil.csv')
ar6_PrimaryEnergy_Coal = pd.read_csv('PrimaryEnergy_Coal.csv')

In [15]:
#Get the intersection of the models and scenarios contained in each variable
ar6_Model_Scenario = ar6_Kyoto_Gases[['Model','Scenario']]
ar6_Variables = [ar6_CarbonSequestration,ar6_FinalEnergy_Liquid,ar6_PrimaryEnergy_Coal,ar6_PrimaryEnergy_Gas,ar6_PrimaryEnergy_Oil]
for variable in ar6_Variables:
    ar6_Model_Scenario = pd.merge(ar6_Model_Scenario,variable[['Model','Scenario']],on=['Model','Scenario'],how='inner')

In [16]:
for i in range(len(ar6_Variables)):
    ar6_Variables[i] = pd.merge(ar6_Model_Scenario,ar6_Variables[i],on=['Model','Scenario'],how='inner')
for i in range(len(ar6_Variables)):
    ar6_Variables[i].drop(columns=['Category_name'],inplace = True)

In [17]:
ar6_Kyoto_Gases = pd.merge(ar6_Kyoto_Gases,ar6_Model_Scenario,on = ['Model','Scenario'],how = 'inner')

In [18]:
ar6_Variables.append(ar6_Kyoto_Gases)

In [19]:
#Generate feature matrices, the values of each variable during 2020-2100. 9 time steps, 6 features
#1160 is the amount of data
X = np.zeros((1160,9,6))
for i in range(len(ar6_Variables)):
    ar6_Variables[i] = ar6_Variables[i].iloc[:,3:-1].values
for i in range(1160):
    for j in range(9):
        for k in range(6):
            X[i][j][k] = (ar6_Variables[k])[i,j]

In [20]:
Y = ar6_Kyoto_Gases['Category'].values

In [21]:
#Separate datasets by category. 
C1234_DataSet = X[Y == 0]
C56_DataSet = X[Y == 1]
C78_DataSet = X[Y == 2]

In [22]:
architecture1 = tsgm.models.zoo["vae_conv5"](9, 6, 8)#Latent Dim = 8
encoder1, decoder1 = architecture1.encoder, architecture1.decoder

In [23]:
scaler_C1234 = tsgm.utils.TSFeatureWiseScaler((0,1))        
scaled_C1234_data = scaler_C1234.fit_transform(C1234_DataSet)

In [24]:
architecture2 = tsgm.models.zoo["vae_conv5"](9, 6, 8)
encoder2, decoder2 = architecture2.encoder, architecture2.decoder
scaler_C56 = tsgm.utils.TSFeatureWiseScaler((0,1))    
scaled_C56_data = scaler_C56.fit_transform(C56_DataSet)

In [25]:
architecture3 = tsgm.models.zoo["vae_conv5"](9, 6, 8)
encoder3, decoder3 = architecture3.encoder, architecture3.decoder
scaler_C78 = tsgm.utils.TSFeatureWiseScaler((0,1))    
scaled_C78_data = scaler_C78.fit_transform(C78_DataSet)

In [26]:
#Load model parameters
encoder1.load_weights('Policy-encoder1_weights-top6.h5')
encoder2.load_weights('Policy-encoder2_weights-top6.h5')
encoder3.load_weights('Policy-encoder3_weights-top6.h5')
decoder1.load_weights('Policy-decoder1_weights-top6.h5')
decoder2.load_weights('Policy-decoder2_weights-top6.h5')
decoder3.load_weights('Policy-decoder3_weights-top6.h5')

In [27]:
#Generate data using generative models (1000 for each class)
z1 = tf.random.normal((1000, 8))
z2 = tf.random.normal((1000, 8))
z3 = tf.random.normal((1000, 8))
Gen_C1234 = decoder1(z1)
Gen_C56 = decoder2(z2)
Gen_C78 = decoder3(z3)
Gen_C1234 = scaler_C1234.inverse_transform(Gen_C1234)
Gen_C56 = scaler_C56.inverse_transform(Gen_C56)
Gen_C78 = scaler_C78.inverse_transform(Gen_C78)

In [None]:
#CarbonSequestration_2050 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2050[0],_2p0_2050[0],Gen_C1234[:,3,0],Gen_C56[:,3,0],Gen_C78[:,3,0]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('CarbonSequestration_2050',fontsize=20)
plt.title('Box Plot of CarbonSequestration_2050(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,31000])
plt.savefig('Box Plot of CarbonSequestration_2050(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#FinalEnergy_Liquid_2050 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2050[1],_2p0_2050[1],Gen_C1234[:,3,1],Gen_C56[:,3,1],Gen_C78[:,3,1]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('FinalEnergy_Liquid_2050',fontsize=20)
plt.title('Box Plot of FinalEnergy_Liquid_2050(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,400])
plt.savefig('Box Plot of FinalEnergy_Liquid_2050(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#PrimaryEnergy_Coal_2050 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2050[2],_2p0_2050[2],Gen_C1234[:,3,2],Gen_C56[:,3,2],Gen_C78[:,3,2]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('PrimaryEnergy_Coal_2050',fontsize=20)
plt.title('Box Plot of PrimaryEnergy_Coal_2050(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,450])
plt.savefig('Box Plot of PrimaryEnergy_Coal_2050(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#PrimaryEnergy_Gas_2050 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2050[3],_2p0_2050[3],Gen_C1234[:,3,3],Gen_C56[:,3,3],Gen_C78[:,3,3]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('PrimaryEnergy_Gas_2050',fontsize=20)
plt.title('Box Plot of PrimaryEnergy_Gas_2050(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,450])
plt.savefig('Box Plot of PrimaryEnergy_Gas_2050(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#PrimaryEnergy_Oil_2050 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2050[4],_2p0_2050[4],Gen_C1234[:,3,4],Gen_C56[:,3,4],Gen_C78[:,3,4]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('PrimaryEnergy_Oil_2050',fontsize=20)
plt.title('Box Plot of PrimaryEnergy_Oil_2050(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
#plt.ylim([0,450])
plt.savefig('Box Plot of PrimaryEnergy_Oil_2050(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#Kyoto_Gases_2050 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2050[5],_2p0_2050[5],Gen_C1234[:,3,5],Gen_C56[:,3,5],Gen_C78[:,3,5]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('Kyoto_Gases',fontsize=20)
plt.title('Box Plot of Kyoto_Gases_2050(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([-20000,120000])
plt.savefig('Box Plot of Kyoto_Gases_2050(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#CarbonSequestration_2100 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2100[0],_2p0_2100[0],Gen_C1234[:,8,0],Gen_C56[:,8,0],Gen_C78[:,8,0]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('CarbonSequestration_2100',fontsize=20)
plt.title('Box Plot of CarbonSequestration_2100(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,45000])
plt.savefig('Box Plot of CarbonSequestration_2100(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#FinalEnergy_Liquid_2100 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2100[1],_2p0_2100[1],Gen_C1234[:,8,1],Gen_C56[:,8,1],Gen_C78[:,8,1]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('FinalEnergy_Liquid_2100',fontsize=20)
plt.title('Box Plot of FinalEnergy_Liquid_2100(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,400])
plt.savefig('Box Plot of FinalEnergy_Liquid_2100(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#PrimaryEnergy_Coal_2100 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2100[2],_2p0_2100[2],Gen_C1234[:,8,2],Gen_C56[:,8,2],Gen_C78[:,8,2]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('PrimaryEnergy_Coal_2100',fontsize=20)
plt.title('Box Plot of PrimaryEnergy_Coal_2100(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,900])
plt.savefig('Box Plot of PrimaryEnergy_Coal_2100(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#PrimaryEnergy_Gas_2100 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2100[3],_2p0_2100[3],Gen_C1234[:,8,3],Gen_C56[:,8,3],Gen_C78[:,8,3]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('PrimaryEnergy_Gas_2100',fontsize=20)
plt.title('Box Plot of PrimaryEnergy_Gas_2100(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,600])
plt.savefig('Box Plot of PrimaryEnergy_Gas_2100(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#PrimaryEnergy_Oil_2100 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2100[4],_2p0_2100[4],Gen_C1234[:,8,4],Gen_C56[:,8,4],Gen_C78[:,8,4]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('PrimaryEnergy_Oil_2100',fontsize=20)
plt.title('Box Plot of PrimaryEnergy_Oil_2100(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([0,400])
plt.savefig('Box Plot of PrimaryEnergy_Oil_2100(Generate and SR DataSet).pdf',format='pdf')
plt.show()

In [None]:
#Kyoto_Gases_2100 SR data versus generated data.
Category_names = ['1.5C','2.0C','C1234_Generate','C56_Generate','C78_Generate']
Data_Values = [_1p5_2100[5],_2p0_2100[5],Gen_C1234[:,8,5],Gen_C56[:,8,5],Gen_C78[:,8,5]]
box_positions = np.arange(1, len(Category_names) + 1) - 0.25
scatter_positions = np.arange(1, len(Category_names) + 1) + 0.25

colors = ['goldenrod','royalblue','brown','orangered','olive']
plt.figure(figsize=(12,10))
box_plot = plt.boxplot(Data_Values,showfliers=False,patch_artist=True,widths=0.25,positions=box_positions)
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)
for median_line in box_plot['medians']:
    median_line.set(color='black')
# scatterplot
for i, data in enumerate(Data_Values):
    x = np.random.normal(scatter_positions[i], 0.05, size=len(data))
    plt.scatter(x, data, color=colors[i], s=5.8)
plt.xlabel('Agg-Category',fontsize=20)
plt.ylabel('Kyoto_Gases',fontsize=20)
plt.title('Box Plot of Kyoto_Gases_2100(Generate and SR DataSet)',fontsize=25)
# set scale
plt.xticks(range(1, len(Category_names) + 1), Category_names)
plt.ylim([-25000,160000])
plt.savefig('Box Plot of Kyoto_Gases_2100(Generate and SR DataSet).pdf',format='pdf')
plt.show()