In [117]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
pd.options.plotting.backend = 'plotly'

In [118]:
# Open EletricityProductionPlant dataset
df_plants = pd.read_csv('../../Data/ElectricityProductionPlant.csv', sep=',')

# Remove all the rows that have a NaN value
df_plants = df_plants.dropna()

# Keep only the index, Canton, BeginningOfOperation, TotalPower and MainCategory columns
df_plants = df_plants[['Canton', 'BeginningOfOperation', 'TotalPower', 'MainCategory']]

df_plants.head()

Unnamed: 0,Canton,BeginningOfOperation,TotalPower,MainCategory
0,BL,2009-05-05,18.81,maincat_2
1,AG,2011-10-28,5.8,maincat_2
2,BE,2008-10-07,3.0,maincat_2
3,BE,2008-06-27,8.4,maincat_2
4,ZH,2006-04-21,4.8,maincat_2


In [119]:
df_plants = df_plants.sort_values(by=['BeginningOfOperation'])

In [120]:
# Create a new column that contains the cumulative sum of the TotalPower column, reset the sum for each main category and canton
df_plants['CumulativePower'] = df_plants.groupby(['Canton', 'MainCategory'])['TotalPower'].cumsum()
df_plants.head(60)

Unnamed: 0,Canton,BeginningOfOperation,TotalPower,MainCategory,CumulativePower
162021,LU,1863-01-01,200.0,maincat_1,200.0
158324,LU,1886-01-01,740.0,maincat_1,940.0
161948,BE,1888-01-01,120.0,maincat_1,120.0
161991,SO,1888-01-01,240.0,maincat_1,240.0
158370,NW,1890-01-01,240.0,maincat_1,240.0
161901,AG,1892-01-01,120.0,maincat_1,120.0
161787,SG,1893-01-01,550.0,maincat_1,550.0
158403,TI,1894-01-01,950.0,maincat_1,950.0
161708,SG,1894-02-01,300.0,maincat_1,850.0
158418,JU,1894-07-01,7550.0,maincat_1,7550.0


In [129]:
# Create a new dataframe for every canton and put it in a list
df_canton = {}
for canton in df_plants['Canton'].unique():
    df_canton[canton] = df_plants[df_plants['Canton'] == canton]

#Reset the index of the dataframes
for canton in df_canton:
    df_canton[canton] = df_canton[canton].reset_index(drop=True)

In [130]:
for canton in df_canton:
    print(canton)
    for index in range(len(df_canton[canton])-1, 0, -1):
        if df_canton[canton].iloc[index]['BeginningOfOperation'] == df_canton[canton].iloc[index - 1]['BeginningOfOperation'] and df_canton[canton].iloc[index]['Canton'] == df_canton[canton].iloc[index - 1]['Canton']:
            df_canton[canton].iloc[index - 1]['TotalPower'] += df_canton[canton].iloc[index]['TotalPower']
            df_canton[canton].iloc[index - 1]['CumulativePower'] += df_canton[canton].iloc[index]['CumulativePower']
            if index in df_canton[canton].index:
                df_canton[canton].drop(index, axis=0, inplace=True)



LU
BE
SO
NW
AG
SG
TI
JU
VD
FR
ZG
GL
ZH
GR
OW
UR
VS
SZ
BS
GE
TG
BL
SH
AI
NE
AR


In [131]:
df_canton['VS'].head(60)

Unnamed: 0,Canton,BeginningOfOperation,TotalPower,MainCategory,CumulativePower
0,VS,1908-06-01,19200.0,maincat_1,19200.0
1,VS,1909-01-01,28000.0,maincat_1,47200.0
2,VS,1915-01-01,32485.0,maincat_1,79685.0
3,VS,1923-01-01,107000.0,maincat_1,186685.0
4,VS,1926-10-02,29920.0,maincat_1,216605.0
5,VS,1929-10-01,12000.0,maincat_1,228605.0
6,VS,1931-01-01,1512.0,maincat_1,230117.0
8,VS,1942-08-07,7500.0,maincat_1,260657.0
9,VS,1950-12-31,35300.0,maincat_1,295957.0
10,VS,1953-01-01,70418.0,maincat_1,366375.0


In [132]:
import warnings
warnings.filterwarnings('ignore')

df_canton_new = {}
for canton in df_canton:
    df_canton_new[canton] = pd.DataFrame(columns=['Canton', 'BeginningOfOperation', 'TotalPower', 'MainCategory', 'CumulativePower'])
    print(canton)
    cat1 = 0
    cat2 = 0
    cat3 = 0
    cat4 = 0
    for index, row in df_canton[canton].iterrows():
        if row['MainCategory'] == "maincat_1":
            cat1 = row['CumulativePower']
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': row['TotalPower'], 'MainCategory': row['MainCategory'], 'CumulativePower': row['CumulativePower']}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_2', 'CumulativePower': cat2}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_3', 'CumulativePower': cat3}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_4', 'CumulativePower': cat4}, ignore_index=True)
        elif row['MainCategory'] == "maincat_2":
            cat2 = row['CumulativePower']
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_1', 'CumulativePower': cat1}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': row['TotalPower'], 'MainCategory': row['MainCategory'], 'CumulativePower': row['CumulativePower']}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_3', 'CumulativePower': cat3}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_4', 'CumulativePower': cat4}, ignore_index=True)
        elif row['MainCategory'] == "maincat_3":
            cat3 = row['CumulativePower']
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_1', 'CumulativePower': cat1}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_2', 'CumulativePower': cat2}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': row['TotalPower'], 'MainCategory': row['MainCategory'], 'CumulativePower': row['CumulativePower']}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_4', 'CumulativePower': cat4}, ignore_index=True)
        elif row['MainCategory'] == "maincat_4":
            cat4 = row['CumulativePower']
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_1', 'CumulativePower': cat1}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_2', 'CumulativePower': cat2}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': 0, 'MainCategory': 'maincat_3', 'CumulativePower': cat3}, ignore_index=True)
            df_canton_new[canton] = df_canton_new[canton].append({'Canton': row['Canton'], 'BeginningOfOperation': row['BeginningOfOperation'], 'TotalPower': row['TotalPower'], 'MainCategory': row['MainCategory'], 'CumulativePower': row['CumulativePower']}, ignore_index=True)

df_canton_new['ZH'].head(60)

LU
BE
SO
NW
AG
SG
TI
JU
VD
FR
ZG
GL
ZH
GR
OW
UR
VS
SZ
BS
GE
TG
BL
SH
AI
NE
AR


Unnamed: 0,Canton,BeginningOfOperation,TotalPower,MainCategory,CumulativePower
0,ZH,1900-01-01,58.0,maincat_1,58.0
1,ZH,1900-01-01,0.0,maincat_2,0.0
2,ZH,1900-01-01,0.0,maincat_3,0.0
3,ZH,1900-01-01,0.0,maincat_4,0.0
4,ZH,1920-07-01,45220.0,maincat_1,48038.0
5,ZH,1920-07-01,0.0,maincat_2,0.0
6,ZH,1920-07-01,0.0,maincat_3,0.0
7,ZH,1920-07-01,0.0,maincat_4,0.0
8,ZH,1936-01-01,105.0,maincat_1,48143.0
9,ZH,1936-01-01,0.0,maincat_2,0.0


In [134]:
#Sort all the dataframes by BeginningOfOperation and MainCategory
for canton in df_canton_new:
    df_canton_new[canton] = df_canton_new[canton].sort_values(by=['BeginningOfOperation', 'MainCategory'])

#Show rows 60 to 100
df_canton_new['VS'].iloc[80:140]


Unnamed: 0,Canton,BeginningOfOperation,TotalPower,MainCategory,CumulativePower
80,VS,1965-12-31,26563.0,maincat_1,2947696.0
81,VS,1965-12-31,0.0,maincat_2,0.0
82,VS,1965-12-31,0.0,maincat_3,0.0
83,VS,1965-12-31,0.0,maincat_4,0.0
84,VS,1967-01-16,9744.0,maincat_1,2957440.0
85,VS,1967-01-16,0.0,maincat_2,0.0
86,VS,1967-01-16,0.0,maincat_3,0.0
87,VS,1967-01-16,0.0,maincat_4,0.0
88,VS,1968-01-01,351000.0,maincat_1,3308440.0
89,VS,1968-01-01,0.0,maincat_2,0.0


In [135]:
df_canton_new['VS'].plot(kind='area',x='BeginningOfOperation', y='CumulativePower', title='ZH', color='MainCategory', line_group='MainCategory')


In [None]:
px.data.medals_long()[1:30]

Unnamed: 0,nation,medal,count
1,China,gold,10
2,Canada,gold,9
3,South Korea,silver,13
4,China,silver,15
5,Canada,silver,12
6,South Korea,bronze,11
7,China,bronze,8
8,Canada,bronze,12
