# DRIN Preprocessing Script 2 - Solar and wind Capacity Factor Calculations

This notebook is intended to calculate the solar and wind capacity factor based on 'Renewable.Ninja' data. 



### 1. Import packages and dependencies. 

In [42]:
%load_ext autoreload
import sys
sys.path.append("..") #this is to add the above folder to the package directory
import os
#import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# from pandas.plotting import register_matplotlib_converters
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline 

### 2. Read the input file and setup the dataframe 

In [4]:
#reading all the sheets in the file:

folder_path = os.path.join('..','data')

raw_CFdata = os.path.join(folder_path, 'CapacityFactor for Wind and Solar - All countries RN 2.xlsx')

sheetnames = ['AL Solar IN', 'AL Solar OUT', 'AL WPP IN', 'AL WPP OUT',
              'MK Solar IN', 'MK Solar OUT', 'MK WPP IN', 'MK WPP OUT',
              'ME Solar IN', 'ME Solar OUT', 'ME WPP IN', 'ME WPP OUT',
              'XS Solar IN', 'XS Solar OUT', 'XS WPP IN', 'XS WPP OUT']

list_of_dfs = [pd.read_excel(raw_CFdata, sheet_name=sheet, skiprows=3) for sheet in sheetnames]

#Combining the files into different sheets:
filenames = sheetnames
for dataframe, filename in zip(list_of_dfs, filenames):
    dataframe['filename'] = filename
    
combined_df = pd.concat(list_of_dfs, ignore_index=True)

df = combined_df[['filename','local_time','electricity']].copy()


### 3 Calcualte the capacity factors 

In [10]:
df['year'] = df['local_time'].dt.year 
df['month'] = df['local_time'].dt.month
df['week'] = df['local_time'].dt.week
df['day'] = df['local_time'].dt.day 
df['hour'] = df['local_time'].dt.hour

avg = pd.pivot_table(df, values='electricity', index='week', columns=['filename'], aggfunc=np.mean, fill_value=0)

all_cf = (avg/1000).round(3)  #since the installed capacity is 1000 KW
all_cf.reset_index(inplace=True)
all_cf.rename(columns={'week':'TS'}, inplace=True)
all_cf.drop([52], axis=0, inplace=True)

  df['week'] = df['local_time'].dt.week


### 4 Saving the CF tables for each technology and country

In [17]:
#The excel files will be saved in the same directory where this notebook exists

output_folder = os.path.join('processed_data', 're_capacity_factors')
os.makedirs(output_folder, exist_ok = True)

names = all_cf.drop(['TS'], axis=1).columns

file_path=os.path.join(output_folder, 'Solar_and_Wind_CFs_20201029.xlsx')
writer = pd.ExcelWriter(file_path, engine='xlsxwriter')

for name in names:
    dfname = all_cf[['TS',name]].copy()
    for i in range(20,56):
        dfname['20{}'.format(i)]=all_cf[name]
        dfname.rename({'2055':'2055:=','TS':' '},axis=1, inplace=True)
        ##dfname.drop([name], axis=1).to_excel('{}.xlsx'.format(name))
        #dfname.drop([name], axis=1).to_excel(writer, sheet_name='{}'.format(name), index=False)

writer.save()

# End