In [1]:
# Module Importations
from collections import Counter
import datetime
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd 
import seaborn as sns

# Print versioning information
print('seaborn version =', sns.__version__)
print('numpy version =', np.__version__)  
print('pandas version =', pd.__version__)

seaborn version = 0.10.0
numpy version = 1.19.4
pandas version = 1.1.4


In [2]:
# Custom Modules
from Source.data import load_data
from Source.features import data_munging
from Source.visualisation import figures_helper

In [3]:
# Constants
SAVE_FIGURES = True

In [4]:
# Load Dataset
df_plant1 = load_data.load_pickled_data("df_plant1_feat_eng.pkl")
df_plant2 = load_data.load_pickled_data("df_plant2_feat_eng.pkl")

Loading pickled dataframe started ...
Loading pickled dataframe complete ...
Loading pickled dataframe started ...
Loading pickled dataframe complete ...


In [7]:
df_plant1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71808 entries, 0 to 71806
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   DATE_TIME        71808 non-null  datetime64[ns]
 1   PLANT_ID         71808 non-null  object        
 2   SOURCE_KEY       71808 non-null  object        
 3   DC_POWER         71808 non-null  float64       
 4   DAILY_YIELD      71808 non-null  float64       
 5   AMB_TEMP         71808 non-null  float64       
 6   MOD_TEMP         71808 non-null  float64       
 7   IRRADIATION      71808 non-null  float64       
 8   DATE             71808 non-null  object        
 9   TIME_OF_DAY      71808 non-null  object        
 10  HOUR             71808 non-null  int64         
 11  DAY              71808 non-null  int64         
 12  WEEKDAY          71808 non-null  object        
 13  MONTH            71808 non-null  int64         
 14  YEAR             71808 non-null  int64

# Identify Suspect Faulty Panels

An assumption is that panels will have zero output if they are in a fault condition (as opposed to a lower output). This assumption is supported by a working principle of inverters that they will stop producing an output if the input voltage from the connected solar panel falls too low (so partial outputs are unlikely).

Panels with larger numbers of zero-output instances may be faulty.

This can be visualised with a cut-off region.

In [13]:

# Count number of zero instances per cell
zero_outputs = df_plant1[df_plant1['DC_POWER'] == 0].groupby('SOURCE_KEY')['DATE_TIME'].count().reset_index().rename(columns = {'DATE_TIME' : 'ZERO_OUTPUTS'})
zero_outputs.sort_values('ZERO_OUTPUTS', ascending = False)

# Create a grouping of source keys and avg daily yield
daily_yield = df_plant1.groupby(['SOURCE_KEY'])[['SOURCE_KEY', 'AVG_DAILY_YIELD']].mean().reset_index().sort_values('AVG_DAILY_YIELD', ascending = False)

# Merge zero instances and avg daily yield to identify cells with lots of downtime
plant1_efficiency = pd.merge(zero_outputs, daily_yield, how = 'inner', on = 'SOURCE_KEY')

plant1_efficiency.head(20)

Unnamed: 0,SOURCE_KEY,ZERO_OUTPUTS,AVG_DAILY_YIELD
0,1BY6WEcLGh8j5v7,1560,3172.957105
1,1IF53ai7Xc0U56Y,1538,3303.726946
2,3PZuoBAID5Wc2HD,1540,3302.496854
3,7JYdWkrLSPkdwr4,1540,3264.622511
4,McdE0feGgRqW7Ca,1540,3270.839253
5,VHMLBKoKgIrUVDU,1541,3345.201309
6,WRmjgnKYAwPKWDb,1540,3238.81483
7,YxYtjZvoooNbGkE,1537,3227.72796
8,ZnxXDlPa8U1GXgE,1540,3318.424157
9,ZoEaEvLYb1n2sOq,1538,3200.990519
