In [43]:
#Import libraries
import pandas as pd
import  numpy as np
import seaborn as sns
import matplotlib.pyplot as plt



import xgboost as xgb
from sklearn import metrics
from sklearn.metrics import auc, accuracy_score,  mean_squared_error,explained_variance_score,r2_score
from sklearn.model_selection import  train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
import requests
import warnings


from pvlib import solarposition, irradiance, atmosphere, pvsystem, inverter, temperature
from pvlib.forecast import GFS
from pvlib.pvsystem import PVSystem, retrieve_sam
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
from pvlib.tracking import SingleAxisTracker
from pvlib.modelchain import ModelChain
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# import bs4
from bs4 import BeautifulSoup #l tool for parsing html data
import datetime as datetime
from datetime import date, timedelta
import streamlit as st


## Physical Asset

In [44]:
st.title("Wind Farm Analyser")
st.markdown("""
This application allows users to estimate and forecast power output from proposed 
wind farm sites. It makes use of the followign resurces.
* **Windpowerlib Library** : This is a python library which makes use of turbine data and weather foreast data
 to predict wind turbine and wind farm output.
* **Open Weather API** : This is an interface which provided weather forecasts for geoloecated places.
""")

DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)

In [45]:

dt = date.today() - timedelta(12)
lst={d.strftime('%Y%m%d') for d in pd.date_range(dt,date.today())}

#the base url address with a blank 'dt' tag that will be egenrate from a loop
url_test = 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=%s'

# List comprehension to geenrate urls
urls=[url_test %i for i in lst]
urls.sort(key=lambda url: url.split('/')[-1], reverse=False)

urls


['https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211216',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211217',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211218',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211219',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211220',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211221',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211222',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211223',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211224',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211225',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211226',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211227',
 'https://pvoutput.org/intraday.jsp?id=49819&sid=45346&dt=20211228']

In [46]:
#generate pages usign requests
pages=[requests.get(j)for j in urls]

#use beautiful soup to parse html
soups=[BeautifulSoup(page.text, 'html.parser') for page in pages]

In [47]:
# This workflow extracts data from the datatables and generates a list of dataframes, one for each day
tables=[soup.find('table',id='tb') for soup in soups]

table_rows = [table.find_all('tr') for table in tables]

results=[]
for table_row in table_rows:
  res=[]
  for tr in table_row:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)
  results.append(res)

# Generate list of dataframes
dfs = [pd.DataFrame(i, columns=['Date','Time','Energy(kWh)','Efficiency(kWh/kW)','Power(W)','Average(W)',
      'Normalised(kW/kW)','Temperature(C)','Voltage(V)','Energy Used(kWh)','Power Used(W)']) for i in results ]
#Remove first row which picked erroneous data
dfs=[df[1:] for df in dfs]   

In [48]:
# Concatenate list of dataframes into single df
data=pd.concat(dfs)

In [49]:
# Preprocessign data
# Removes "W", 'kWh' labels and thousand separator commas
data['Energy(kWh)'] = data['Energy(kWh)'].str.replace('kWh', '')
data['Efficiency(kWh/kW)'] = data['Efficiency(kWh/kW)'].str.replace('kWh/kW', '')
data['SolarOutput(kW)'] = data['Power(W)'].str.replace('W', '').str.replace(',', '')
data['SolarOutput(kW)'] =data['SolarOutput(kW)'].apply(pd.to_numeric, errors='coerce').multiply(0.001)


data['SolarPowerAverage(kW)'] =data['Average(W)'].str.replace('W', '')
data['SolarPowerAverage(kW)']=data['SolarPowerAverage(kW)'].apply(pd.to_numeric, errors='coerce').multiply(0.001)

data['Normalised(kW/kW)'] = data['Normalised(kW/kW)'].str.replace('kW/kW', '')
data['Temperature(C)'] = data['Temperature(C)'].str.replace('C', '')

# data['Voltage(V)'] = data['Voltage(V)'].str.replace('-', 0)
data['Energy Used(kWh)'] = data['Energy Used(kWh)'].str.replace('kWh', '')
data['PowerUsed(kW)'] = data['Power Used(W)'].str.replace('W', '').str.replace(',', '')
data['PowerUsed(kW)'] =data['PowerUsed(kW)'].apply(pd.to_numeric, errors='coerce').multiply(0.001)

data["Date"]=pd.to_datetime(data['Date'], format='%d/%m/%y') 
data["Date"]=data["Date"].astype(str)
# #Combine Date and Time Columns and convert to Datetime
data['Time']= pd.to_datetime(data['Time']).dt.strftime('%H:%M:%S')
data['Datetime'] =pd.to_datetime(data['Date'] + ' ' + data['Time'])



In [50]:
# # add date as string column

data.drop(['Date','Time'],axis=1,inplace=True)

cols=data.columns.drop(['Datetime'])
data[cols] = data[cols].apply(pd.to_numeric, errors='coerce')


# #Reorder Columns
data=data[['Datetime','Energy(kWh)', 'Efficiency(kWh/kW)', 'SolarOutput(kW)', 'SolarPowerAverage(kW)',
       'Normalised(kW/kW)', 'Temperature(C)', 'Voltage(V)', 'Energy Used(kWh)',
       'PowerUsed(kW)']]


data.drop('Voltage(V)',axis=1,inplace=True)



data=data.fillna(0)
data=data.sort_values(by=['Datetime'],ascending=True)
data['Import/Export']=data['Energy(kWh)']-data['Energy Used(kWh)']
data=data.set_index('Datetime')

In [51]:
data.tail(5)

Unnamed: 0_level_0,Energy(kWh),Efficiency(kWh/kW),SolarOutput(kW),SolarPowerAverage(kW),Normalised(kW/kW),Temperature(C),Energy Used(kWh),PowerUsed(kW),Import/Export
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-28 20:40:00,46.726,3.054,0.0,0.0,0.0,16.7,134.177,0.918,-87.451
2021-12-28 20:45:00,46.726,3.054,0.0,0.0,0.0,16.7,134.255,0.934,-87.529
2021-12-28 20:50:00,46.726,3.054,0.0,0.0,0.0,16.1,134.332,0.925,-87.606
2021-12-28 20:55:00,46.726,3.054,0.0,0.0,0.0,16.1,134.409,0.921,-87.683
2021-12-28 21:00:00,46.726,3.054,0.0,0.0,0.0,16.1,134.486,0.923,-87.76


In [52]:
data['Import/Export']=data['Energy(kWh)']-data['Energy Used(kWh)']

In [53]:
data.columns

Index(['Energy(kWh)', 'Efficiency(kWh/kW)', 'SolarOutput(kW)',
       'SolarPowerAverage(kW)', 'Normalised(kW/kW)', 'Temperature(C)',
       'Energy Used(kWh)', 'PowerUsed(kW)', 'Import/Export'],
      dtype='object')

- Energy (kWh) - is the energy generated by the system up to that point in time for the current date
- Efficiency(kWh/kW) - Is calculated by dividing the total energy output (kWh) by the system size (kW)
- Average(W) - Is a calculated value based on the change in energy divided by the change in time.
- Normalised (kW/kW) - Is calculated by dividing Average Power by system size. It is a system performance metric
- Temperature(C) - Ambient Temperature(C)
- Energy USed(kWh) - Total Energy Consumed by the Premises.Consumption = Generation + Import - Export
- Power Used(W) - Average Energy Used Over a Time Interval
- Power(W)- Is the power generated by the system at that moment in time

In [56]:
fig3 = go.Figure()

fig3.add_trace(go.Scatter(
    x=data.index,
    y=data['SolarPowerAverage(kW)']
))

fig3.update_layout(
    title="12-Day Plant Performance"
)
fig3.update_xaxes(rangeslider_visible=True,rangeselector=dict(buttons=list([dict(count=1,label="1h",step="hour",stepmode="backward"),
dict(step="all")],)))

In [57]:
data.head(5)

Unnamed: 0_level_0,Energy(kWh),Efficiency(kWh/kW),SolarOutput(kW),SolarPowerAverage(kW),Normalised(kW/kW),Temperature(C),Energy Used(kWh),PowerUsed(kW),Import/Export
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-16 00:00:00,0.0,0.0,0.0,0.0,0.0,20.6,0.0,1.059,0.0
2021-12-16 00:05:00,0.0,0.0,0.0,0.0,0.0,20.6,0.091,1.097,-0.091
2021-12-16 00:10:00,0.0,0.0,0.0,0.0,0.0,20.0,0.177,1.028,-0.177
2021-12-16 00:15:00,0.0,0.0,0.0,0.0,0.0,20.0,0.271,1.129,-0.271
2021-12-16 00:20:00,0.0,0.0,0.0,0.0,0.0,20.0,0.354,0.995,-0.354


In [58]:
# import seaborn as sns
# sns.pairplot(data.sample(200))

### Time Series Forecast

In [59]:
# corr=data.corr()
# plt.figure(figsize=(13,7))
# sns.heatmap(corr,annot=True)
# plt.show()

## Digital Asset

In [62]:
latitude = -28.89367
longitude = 31.46824

# latitude = -28.893597
# longitude = 31.468293

tz ='Africa/Johannesburg'
surface_tilt = 30
surface_azimuth = 180
albedo = 0.2

#Set beginning and end date
end=pd.Timestamp(datetime.date.today(), tz=tz) 
start = end-timedelta(12)

# Define forecast model
fm = GFS()

# Retrieve data from forecast API and perform data preparation
previous_forecast = fm.get_data(latitude, longitude, start, end)
previous_forecast.index = previous_forecast.index.strftime('%Y-%m-%d %H:%M:%S')
previous_forecast.index=pd.to_datetime(previous_forecast.index)

HTTPError: 503 Server Error: Service Unavailable for url: https://thredds.ucar.edu/thredds/catalog.xml

In [None]:
previous_forecast.head()

Unnamed: 0,Downward_Short-Wave_Radiation_Flux_surface_Mixed_intervals_Average,Total_cloud_cover_boundary_layer_cloud_Mixed_intervals_Average,Total_cloud_cover_entire_atmosphere_Mixed_intervals_Average,Temperature_surface,High_cloud_cover_high_cloud_Mixed_intervals_Average,Total_cloud_cover_convective_cloud,Wind_speed_gust_surface,Low_cloud_cover_low_cloud_Mixed_intervals_Average,Medium_cloud_cover_middle_cloud_Mixed_intervals_Average,v-component_of_wind_isobaric,u-component_of_wind_isobaric
2021-12-16 02:00:00,0.0,0.4,62.400002,294.448792,15.0,0.0,10.9799,0.4,53.200001,3.245068,2.723442
2021-12-16 05:00:00,0.08,3.8,100.0,292.741974,100.0,1.91,2.413769,3.7,31.6,0.543561,1.961029
2021-12-16 08:00:00,94.919998,13.8,100.0,295.263977,99.900002,24.35,9.401733,13.4,51.299999,6.283975,1.878523
2021-12-16 11:00:00,129.839996,99.0,100.0,293.749725,93.699997,6.91,10.01511,99.800003,84.5,6.388044,2.129277
2021-12-16 14:00:00,140.300003,96.0,100.0,291.865662,94.699997,3.47,11.16905,99.900002,71.400002,6.170657,2.510225


In [None]:
#resample to three hours to match weather data sampling rate
data_res=data.resample('3H',offset = '2H').mean()

#set datetime limits of solar farm data to match weather data
forecast_dates=previous_forecast.index
start_datetime=forecast_dates[0]

In [None]:
start_datetime

Timestamp('2021-12-16 02:00:00')

In [None]:
list_r=data_res.index
stop_datetime=list_r[-5]

In [None]:
stop_datetime

Timestamp('2021-12-28 05:00:00', freq='3H')

In [None]:
date_ranges=[start_datetime,stop_datetime]
data_res=data_res[start_datetime:stop_datetime]

In [None]:
data_res.head(5)


Unnamed: 0_level_0,Energy(kWh),Efficiency(kWh/kW),SolarOutput(kW),SolarPowerAverage(kW),Normalised(kW/kW),Temperature(C),Energy Used(kWh),PowerUsed(kW),Import/Export
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-16 02:00:00,0.0,0.0,0.0,0.0,0.0,18.997222,3.699111,1.068389,-3.699111
2021-12-16 05:00:00,1.268917,0.082889,0.9075,0.489,0.059306,18.766667,6.858056,1.039,-5.589139
2021-12-16 08:00:00,4.858083,0.317583,1.437083,0.0,0.093917,18.133333,9.981,1.044444,-5.122917
2021-12-16 11:00:00,9.449083,0.617556,1.266222,0.0,0.082667,17.35,13.094778,0.980028,-3.645694
2021-12-16 14:00:00,12.808722,0.837139,1.241083,0.155667,0.081139,16.294444,15.891306,0.986667,-3.082583


In [None]:
data_res.head()

Unnamed: 0_level_0,Energy(kWh),Efficiency(kWh/kW),SolarOutput(kW),SolarPowerAverage(kW),Normalised(kW/kW),Temperature(C),Energy Used(kWh),PowerUsed(kW),Import/Export
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-16 02:00:00,0.0,0.0,0.0,0.0,0.0,18.997222,3.699111,1.068389,-3.699111
2021-12-16 05:00:00,1.268917,0.082889,0.9075,0.489,0.059306,18.766667,6.858056,1.039,-5.589139
2021-12-16 08:00:00,4.858083,0.317583,1.437083,0.0,0.093917,18.133333,9.981,1.044444,-5.122917
2021-12-16 11:00:00,9.449083,0.617556,1.266222,0.0,0.082667,17.35,13.094778,0.980028,-3.645694
2021-12-16 14:00:00,12.808722,0.837139,1.241083,0.155667,0.081139,16.294444,15.891306,0.986667,-3.082583


In [None]:
previous_forecast.head(5)

Unnamed: 0,Downward_Short-Wave_Radiation_Flux_surface_Mixed_intervals_Average,Total_cloud_cover_boundary_layer_cloud_Mixed_intervals_Average,Total_cloud_cover_entire_atmosphere_Mixed_intervals_Average,Temperature_surface,High_cloud_cover_high_cloud_Mixed_intervals_Average,Total_cloud_cover_convective_cloud,Wind_speed_gust_surface,Low_cloud_cover_low_cloud_Mixed_intervals_Average,Medium_cloud_cover_middle_cloud_Mixed_intervals_Average,v-component_of_wind_isobaric,u-component_of_wind_isobaric
2021-12-16 02:00:00,0.0,0.4,62.400002,294.448792,15.0,0.0,10.9799,0.4,53.200001,3.245068,2.723442
2021-12-16 05:00:00,0.08,3.8,100.0,292.741974,100.0,1.91,2.413769,3.7,31.6,0.543561,1.961029
2021-12-16 08:00:00,94.919998,13.8,100.0,295.263977,99.900002,24.35,9.401733,13.4,51.299999,6.283975,1.878523
2021-12-16 11:00:00,129.839996,99.0,100.0,293.749725,93.699997,6.91,10.01511,99.800003,84.5,6.388044,2.129277
2021-12-16 14:00:00,140.300003,96.0,100.0,291.865662,94.699997,3.47,11.16905,99.900002,71.400002,6.170657,2.510225


In [None]:

#Merge physical asset data with weather API data
merge_df=pd.merge(data_res,previous_forecast, how='inner', left_index=True, right_index=True)

per_hour=data['SolarOutput(kW)'].groupby(data.index.time).mean().reset_index().rename(columns={data.columns[0]:'Datetime'})

In [None]:
merge_df.head(5)

Unnamed: 0,Energy(kWh),Efficiency(kWh/kW),SolarOutput(kW),SolarPowerAverage(kW),Normalised(kW/kW),Temperature(C),Energy Used(kWh),PowerUsed(kW),Import/Export,Downward_Short-Wave_Radiation_Flux_surface_Mixed_intervals_Average,Total_cloud_cover_boundary_layer_cloud_Mixed_intervals_Average,Total_cloud_cover_entire_atmosphere_Mixed_intervals_Average,Temperature_surface,High_cloud_cover_high_cloud_Mixed_intervals_Average,Total_cloud_cover_convective_cloud,Wind_speed_gust_surface,Low_cloud_cover_low_cloud_Mixed_intervals_Average,Medium_cloud_cover_middle_cloud_Mixed_intervals_Average,v-component_of_wind_isobaric,u-component_of_wind_isobaric
2021-12-16 02:00:00,0.0,0.0,0.0,0.0,0.0,18.997222,3.699111,1.068389,-3.699111,0.0,0.4,62.400002,294.448792,15.0,0.0,10.9799,0.4,53.200001,3.245068,2.723442
2021-12-16 05:00:00,1.268917,0.082889,0.9075,0.489,0.059306,18.766667,6.858056,1.039,-5.589139,0.08,3.8,100.0,292.741974,100.0,1.91,2.413769,3.7,31.6,0.543561,1.961029
2021-12-16 08:00:00,4.858083,0.317583,1.437083,0.0,0.093917,18.133333,9.981,1.044444,-5.122917,94.919998,13.8,100.0,295.263977,99.900002,24.35,9.401733,13.4,51.299999,6.283975,1.878523
2021-12-16 11:00:00,9.449083,0.617556,1.266222,0.0,0.082667,17.35,13.094778,0.980028,-3.645694,129.839996,99.0,100.0,293.749725,93.699997,6.91,10.01511,99.800003,84.5,6.388044,2.129277
2021-12-16 14:00:00,12.808722,0.837139,1.241083,0.155667,0.081139,16.294444,15.891306,0.986667,-3.082583,140.300003,96.0,100.0,291.865662,94.699997,3.47,11.16905,99.900002,71.400002,6.170657,2.510225


In [None]:

# Model

merge_dat=merge_df.reset_index()
merge_dat=merge_dat.rename(columns={merge_dat.columns[0]:'Datetime'})
X1=merge_dat.drop(columns=['Efficiency(kWh/kW)','Energy(kWh)','Datetime','Normalised(kW/kW)',
'Temperature(C)','SolarPowerAverage(kW)'])
y1=merge_dat['Energy(kWh)']

X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.15, random_state=42
                                                   )

xgb_model= xgb.XGBRegressor(objective="reg:linear", random_state=42)

xgb_model.fit(X1_train, y1_train)

sorted_idx = xgb_model.feature_importances_.argsort()

f_importances=pd.DataFrame({'Feature':X1.columns[sorted_idx], 'Importance':xgb_model.feature_importances_[sorted_idx]})



In [None]:
f_importances=f_importances.sort_values(by="Importance",ascending=False)
f_importances

Unnamed: 0,Feature,Importance
14,Downward_Short-Wave_Radiation_Flux_surface_Mix...,0.815062
13,Medium_cloud_cover_middle_cloud_Mixed_interval...,0.06991
12,Energy Used(kWh),0.040729
11,Import/Export,0.019541
10,Total_cloud_cover_entire_atmosphere_Mixed_inte...,0.014369
9,Total_cloud_cover_convective_cloud,0.00834
8,Wind_speed_gust_surface,0.00772
7,u-component_of_wind_isobaric,0.005445
6,v-component_of_wind_isobaric,0.004807
5,High_cloud_cover_high_cloud_Mixed_intervals_Av...,0.003971


In [None]:
def get_cast(start_date,end_date):    
    from pvlib.pvsystem import PVSystem, retrieve_sam

    from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS

    from pvlib.tracking import SingleAxisTracker

    from pvlib.modelchain import ModelChain

    sandia_modules = retrieve_sam('sandiamod')

    cec_inverters = retrieve_sam('cecinverter')

    module = sandia_modules['SolarWorld_Sunmodule_250_Poly__2013_']

    inverter = cec_inverters['ABB__TRIO_20_0_TL_OUTD_S1_US_480__480V_']

    temperature_model_parameters = TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']

    # model a single axis tracker
    system = SingleAxisTracker(module_parameters=module, inverter_parameters=inverter, temperature_model_parameters=temperature_model_parameters, modules_per_string=15, strings_per_inverter=4)

    # fx is a common abbreviation for forecast
    fx_model = GFS()

    forecast_mod = fx_model.get_processed_data(latitude, longitude, start_date, end_date)

    # use a ModelChain object to calculate modeling intermediates
    mchain = ModelChain(system, fx_model.location)

    # extract relevant data for model chain
    mchain.run_model(forecast_mod)
    acp=mchain.ac.fillna(0)
    return acp


In [None]:
#Calculate AC Power
pac=get_cast(start,end)*0.001
pac=pd.DataFrame(pac,columns = ['PVOutput'])

pac.drop(pac.tail(1).index,inplace=True)
pac.index =pac.index.strftime('%Y-%m-%d %H:%M:%S')
pac.index=pd.to_datetime(pac.index)


data=merge_df.reset_index()
data=data.rename(columns={data.columns[0]:'Datetime'})
data=data.set_index('Datetime')

#Merge two Dataframes
comparison=pd.merge(pac,data, how='inner', left_index=True, right_index=True)
comparison['SolarPowerAverage(kW)']=comparison['SolarPowerAverage(kW)']

comparison=comparison[['PVOutput','SolarPowerAverage(kW)','Efficiency(kWh/kW)']]
comparison=comparison.resample('3H').median()



The SingleAxisTracker class was deprecated in pvlib 0.9.0 and will be removed soon. Use PVSystem with SingleAxisTrackerMount instead.


ModelChain.ac is deprecated and will be removed in v0.10. Use ModelChain.results.ac instead



In [None]:
#Merge two Dataframes
comparison=pd.merge(pac,data, how='inner', left_index=True, right_index=True)
comparison['SolarPowerAverage(kW)']=comparison['SolarPowerAverage(kW)']

comparison=comparison[['PVOutput','SolarPowerAverage(kW)','Efficiency(kWh/kW)']]
comparison=comparison.resample('3H').median()

#-----------------------------------------------------------------------

comparison["Performance_Factor"]=(comparison['SolarPowerAverage(kW)']-comparison['PVOutput'])/comparison['PVOutput']
comparison=comparison.replace([np.inf, -np.inf], np.nan)
comparison=comparison.fillna(0)

comparison_slice=comparison.iloc[5:,]
# create a list of our conditions
conditions = [
    (comparison_slice['Performance_Factor'] == 0)& (comparison_slice['PVOutput'] == 0),
    (comparison_slice['Performance_Factor'] <0.6) & (comparison_slice['SolarPowerAverage(kW)'] >= 0),
    (comparison_slice['Performance_Factor'] >0.6) & (comparison_slice['Performance_Factor'] < 1),
    (comparison_slice['Performance_Factor'] > 1)
    ]

# create a list of the values we want to assign for each condition
values = ['night', 'underperforming', 'normal', 'oveperforming']

# create a new column and use np.select to assign values to it using our lists as arguments
comparison_slice['Class'] = np.select(conditions, values)

le = preprocessing.LabelEncoder()


predictors=comparison_slice.drop(columns=['Class'])
target=comparison_slice['Class']


target=le.fit_transform(target)

X2_train, X2_test, y2_train, y2_test = train_test_split(predictors,target, test_size=0.15, random_state=42
                                                   )

rf =RandomForestClassifier(max_depth=4, random_state=0)

rf.fit(X2_train, y2_train)

#predict on last 5 readings


comparison=comparison.iloc[::-1]
predicted=comparison.iloc[:5,:].reset_index()
new_predictions=y2_pred=rf.predict(predicted.iloc[:5,1:])
new_predictions=le.inverse_transform(new_predictions)

predicted['Performance']=new_predictions
predicted=predicted.set_index('index')
predicted.index.rename('Datetime',inplace=True)
predicted=predicted[['SolarPowerAverage(kW)','Performance']]

predicted['SolarPowerAverage(kW)']=predicted['SolarPowerAverage(kW)'].round(decimals=1)
# predicted=predicted.iloc[::-1]
predicted=predicted.reset_index()
# predicted=predicted.sort_values('Power(W)')
# predicted=predicted.rename({'index':'Datetime'})
#---------------------------------------------------------------------------------------------------

start_1 = pd.Timestamp(datetime.date.today(), tz=tz) 
end_1= start_1 + timedelta(12)



pac1=get_cast(start_1,end_1)*0.001
pac1=pd.DataFrame(pac1,columns = ['PVOutput'])




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The SingleAxisTracker class was deprecated in pvlib 0.9.0 and will be removed soon. Use PVSystem with SingleAxisTrackerMount instead.


ModelChain.ac is deprecated and will be removed in v0.10. Use ModelChain.results.ac instead



In [None]:
pac1

Unnamed: 0,PVOutput
2021-12-28 02:00:00+02:00,0.000000
2021-12-28 05:00:00+02:00,0.000000
2021-12-28 08:00:00+02:00,5.538877
2021-12-28 11:00:00+02:00,10.231031
2021-12-28 14:00:00+02:00,9.287839
...,...
2022-01-08 11:00:00+02:00,4.110704
2022-01-08 14:00:00+02:00,3.620440
2022-01-08 17:00:00+02:00,1.043456
2022-01-08 20:00:00+02:00,0.000000
