In [4]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [5]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [6]:
%sql postgresql://postgres:postgres@localhost:5430/eskom_db

In [7]:
%%sql 
DROP TABLE IF EXISTS eskom_data;
CREATE TABLE  eskom_data(
    date_time_hour_beginning DATE,
    original_res_forecast_before_lockdown NUMERIC,
    residual_forecast  NUMERIC,	
    rsa_contracted_forecast	 NUMERIC,
    dispatchable_generation	NUMERIC ,
    residual_demand NUMERIC,
    rsa_contracted_demand  NUMERIC,
    international_exports  NUMERIC,
    international_imports  NUMERIC,
    thermal_generation  NUMERIC,
    nuclear_generation  NUMERIC,
    eskom_gas_generation  NUMERIC,	
    eskom_ocgt_generation  NUMERIC,	
    hydro_water_generation  NUMERIC,
    pumped_water_generation	 NUMERIC,
    ils_usage  NUMERIC,
    manual_load_reduction NUMERIC,
    ios_excl_ils_and_mlr  NUMERIC,
    dispatchable_ipp_ocgt  NUMERIC,
    eskom_gas_sco  NUMERIC,
    eskom_ocgt_sco  NUMERIC,
    hydro_water_sco	 NUMERIC,
    pumped_water_sco  NUMERIC,
    pumping	 NUMERIC,
    wind  NUMERIC,
    pv  NUMERIC,
    csp  NUMERIC,
    other_re  NUMERIC,	
    total_re  NUMERIC,	
    wind_installed_capacity	 NUMERIC,
    pv_installed_capacity  NUMERIC,	
    csp_installed_capacity  NUMERIC,
    other_re_installed_capacity	 NUMERIC,
    total_re_installed_capacity	 NUMERIC,
    installed_eskom_capacity  NUMERIC,
    total_pclf  NUMERIC,
    total_uclf  NUMERIC,
    total_oclf  NUMERIC,
    total_uclf_and_oclf	 NUMERIC,
    non_comm_sentout  NUMERIC,	
    drakensberg_gen_unit_hours  NUMERIC,
    palmiet_gen_unit_hours  NUMERIC,	
    ingula_gen_unit_hours NUMERIC
    
);


 * postgresql://postgres:***@localhost:5430/eskom_db
Done.
Done.


[]

In [8]:
%%sql 
COPY eskom_data(date_time_hour_beginning,original_res_forecast_before_lockdown,residual_forecast,rsa_contracted_forecast,dispatchable_generation,residual_demand,rsa_contracted_demand,international_exports,international_imports,thermal_generation,nuclear_generation,eskom_gas_generation,eskom_ocgt_generation,hydro_water_generation,pumped_water_generation,ils_usage,manual_load_reduction,ios_excl_ils_and_mlr,dispatchable_ipp_ocgt,eskom_gas_sco,eskom_ocgt_sco,hydro_water_sco,pumped_water_sco,pumping,wind,pv,csp,other_re,total_re,wind_installed_capacity,pv_installed_capacity,csp_installed_capacity,other_re_installed_capacity,total_re_installed_capacity,installed_eskom_capacity,total_pclf,total_uclf,total_oclf,total_uclf_and_oclf,non_comm_sentout,drakensberg_gen_unit_hours,palmiet_gen_unit_hours,ingula_gen_unit_hours)
FROM '/Users/ds_learner16/Documents/Eskom/Eskom_Data_Analysis/ESK2033.csv'
DELIMITER ','
CSV HEADER
;

 * postgresql://postgres:***@localhost:5430/eskom_db
37704 rows affected.


[]

In [9]:
%%sql 
DROP TABLE IF EXISTS eskom_clean;
CREATE TABLE eskom_clean 
AS 
SELECT
    EXTRACT(YEAR FROM date_time_hour_beginning) as year,
    EXTRACT(MONTH FROM date_time_hour_beginning) as month,
    EXTRACT(DAY FROM date_time_hour_beginning) as day,
    date_time_hour_beginning,
    original_res_forecast_before_lockdown,
    residual_forecast,
    rsa_contracted_forecast,
    dispatchable_generation,
    residual_demand,
    rsa_contracted_demand,
    international_exports,
    international_imports,
    thermal_generation,
    nuclear_generation,
    eskom_gas_generation,
    eskom_ocgt_generation,
    hydro_water_generation,
    pumped_water_generation,
    ils_usage,
    manual_load_reduction,
    ios_excl_ils_and_mlr,
    dispatchable_ipp_ocgt,
    eskom_gas_sco,
    eskom_ocgt_sco,
    hydro_water_sco,
    pumped_water_sco,
    pumping,
    wind,
    pv,
    csp,
    other_re,
    total_re,
    wind_installed_capacity,
    pv_installed_capacity,
    csp_installed_capacity,
    other_re_installed_capacity,
    total_re_installed_capacity,
    installed_eskom_capacity,
    total_pclf,
    total_uclf,
    total_oclf,
    total_uclf_and_oclf,
    non_comm_sentout,
    drakensberg_gen_unit_hours,
    palmiet_gen_unit_hours,
    ingula_gen_unit_hours

FROM eskom_data;
COPY eskom_clean TO '/Users/ds_learner16/Documents/Eskom/Eskom_Data_Analysis/ESK2033_clean.csv' WITH DELIMITER ',' CSV HEADER;



 * postgresql://postgres:***@localhost:5430/eskom_db
Done.
37704 rows affected.
37704 rows affected.


[]

In [10]:
%%sql
DROP TABLE IF EXISTS Loadshedding;
CREATE TABLE loadshedding (
    created_at TIMESTAMP,
    stage INT
      
);

 * postgresql://postgres:***@localhost:5430/eskom_db
Done.
Done.


[]

In [11]:
%%sql
COPY Loadshedding(created_at,stage)
FROM '/Users/ds_learner16/Documents/Eskom/Eskom_Data_Analysis/EskomSePush_history.csv'
DELIMITER ','
CSV HEADER;

 * postgresql://postgres:***@localhost:5430/eskom_db
670 rows affected.


[]

In [12]:
%%sql 
DROP TABLE IF EXISTS load_shedding_clean;
CREATE TABLE load_shedding_clean
AS
SELECT 
    EXTRACT(YEAR FROM created_at) as year,
    EXTRACT(MONTH FROM created_at) as month,
    EXTRACT(DAY FROM created_at) as day,
    EXTRACT(HOUR FROM created_at) as hour,
    stage

FROM Loadshedding


 * postgresql://postgres:***@localhost:5430/eskom_db
Done.
670 rows affected.


[]

# Number of Rows and Columns 
count rows = 42824 estimated before clean after 37704
removed forecast dates added average for original res before lockdown

count cols = 41

## Which year was the demand high

In [13]:
high_demand = %sql SELECT year as "year" , AVG(residual_demand) as "residual_demand", AVG(rsa_contracted_demand) as "rsa_contracted_demand" FROM eskom_clean GROUP BY year ORDER BY year
high_demand

 * postgresql://postgres:***@localhost:5430/eskom_db
5 rows affected.


year,residual_demand,rsa_contracted_demand
2018,25767.13178469697,26998.60318030303
2019,25219.673938127853,26542.384553082193
2020,23696.602886042805,25117.22037215392
2021,24196.092115525116,25932.20255639269
2022,24394.402998125,26094.779150833332


In [17]:
fig = px.line(high_demand,x="year",y=["residual_demand","rsa_contracted_demand"],title="Residual vs RSA Contracted Demand")
fig.show()

## Which of the renewable energy generation methods have the highest capacity

In [60]:
renew_capacity = %sql SELECT year as "year",AVG(wind_installed_capacity) as "wind",AVG(pv_installed_capacity) as "pv",AVG(csp_installed_capacity) as "csp",AVG(other_re_installed_capacity) as "other" FROM eskom_clean GROUP BY year ORDER BY year
renew_capacity

 * postgresql://postgres:***@localhost:5430/eskom_db
5 rows affected.


year,wind,pv,csp,other
2018,1474.19,311.6363636363636,21.78,3886.6987272727274
2019,1474.19,491.78082191780817,21.78,4067.510821917808
2020,1780.3017486338797,500.0,21.78,4439.6280874316935
2021,2194.0434246575346,500.0,25.00739726027397,5380.93997260274
2022,2212.09,500.0,41.83,5893.468


In [61]:
fig1 = px.line(renew_capacity,x="year",y=["wind","pv","csp","other"],title="Average of renewable energy capacity")
fig1.show()

## Which Year , month , hour had the most loadshedding 

## How much energy is generated yearly by eskom and other enties in the country

## Do we import more energy or export more energy?

## Which one of the energy generation methods yeild the most power in GW

## Thermal power produces the most power but is showing decline in recent years

## Which is the highest performing power station out of the 3 stations