In [1]:
import pandas as pd
import os
import re
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
import shutil
from scipy import stats
from scipy.stats import norm, skew, f, ttest_ind, ttest_rel, mannwhitneyu, levene, bartlett
import requests
import random
import researchpy as rp
import numpy as np
from sklearn.svm import OneClassSVM
import warnings

In [2]:
warnings.filterwarnings("ignore", category=FutureWarning)

In [3]:
inverters = {
    'inv01':'wms01',
    'inv02':'wms01',
    'inv03':'wms01',
    'inv04':'wms01',
    'inv05':'wms01',
    'inv06':'wms02',
    'inv07':'wms02',
    'inv08':'wms02',
    'inv09':'wms02',
    'inv10':'wms02',
}

In [4]:
def dataClean(year, inv):
       
    inputDataPath = "/Users/qilu/Desktop/Advanticsys/Data/" + str(year)
    filenames = os.listdir(inputDataPath)
    

    for file in filenames:         
        inv_today_match = re.search(re.escape(inv) + r'_today_energy', file)    
        if inv_today_match:
            df_today_energy = pd.read_csv(inputDataPath+'/'+file,delimiter=';',index_col=None, header=0)
            
            df_today_energy['Time_Diff'] = df_today_energy['Timestamp'].diff()
            df_today_energy['Energy_Diff'] = df_today_energy['Value'].diff()  
            
        
            df_today_energy = df_today_energy[(df_today_energy[['Time_Diff']] == 60000).all(axis=1)]            
            df_today_energy = df_today_energy[(df_today_energy[['Energy_Diff']] > 0).all(axis=1)]              
        
            df_today_energy = df_today_energy.drop(['Time_Diff'], axis=1)  
            df_today_energy = df_today_energy.drop(['Value'], axis=1)              
            
            df_today_energy.rename(columns={"Energy_Diff": "Energy", "Signal": "Inverter"}, inplace=True)
            df_today_energy['Inverter'] = df_today_energy['Inverter'].str.split('_').str[0]
            
            todayEnergyQ1 = df_today_energy.Energy.quantile(0.25)
            todayEnergyQ3 = df_today_energy.Energy.quantile(0.75)
            todayEnergyUpper = todayEnergyQ3 + 1.5*(todayEnergyQ3-todayEnergyQ1)
            df_today_energy = df_today_energy[(df_today_energy[['Energy']] < todayEnergyUpper).all(axis=1)]
            
    for file in filenames:         
        inv_total_match = re.search(re.escape(inv) + r'_total_energy', file)    
        if inv_total_match:
            df_total_energy = pd.read_csv(inputDataPath+'/'+file,delimiter=';',index_col=None, header=0)
            
            df_total_energy = df_total_energy[(df_total_energy[['Value']] > 0).all(axis=1)]
            df_total_energy.rename(columns={"Value": "Total_Energy", "Signal": "Inverter"}, inplace=True)
            df_total_energy['Inverter'] = df_total_energy['Inverter'].str.split('_').str[0]

    for file in filenames:         
        inv_temp_match = re.search(re.escape(inv) + r'_temp', file)    
        if inv_temp_match:
            df_inv_temp = pd.read_csv(inputDataPath+'/'+file,delimiter=';',index_col=None, header=0)
            df_inv_temp.loc[:, 'Value'] = df_inv_temp.Value.apply(lambda x: x/10) 
            
            df_inv_temp = df_inv_temp[(df_inv_temp[['Value']] > 0).all(axis=1)]            
            df_inv_temp.rename(columns={"Value": "Inv_Temp", "Signal": "Inverter"}, inplace=True)          
            df_inv_temp['Inverter'] = df_inv_temp['Inverter'].str.split('_').str[0]   
            
    for file in filenames:         
        wms_temp_match = re.search(re.escape(inverters[inv]) + r'_temp', file)    
        if wms_temp_match:
            df_wms_temp = pd.read_csv(inputDataPath+'/'+file,delimiter=';',index_col=None, header=0)
            df_wms_temp.loc[:, 'Value'] = df_wms_temp.Value.apply(lambda x: x/10) 
            
            df_wms_temp = df_wms_temp[(df_wms_temp[['Value']] > 0).all(axis=1)]           
            df_wms_temp.rename(columns={"Value": "Wms_Temp", "Signal": "Inverter"}, inplace=True)          
            df_wms_temp['Inverter'] = df_wms_temp['Inverter'].str.split('_').str[0]        
            
    for file in filenames:         
        wms_irr_match = re.search(re.escape(inverters[inv]) + r'_irradiance', file)    
        if wms_irr_match:
            df_wms_irr = pd.read_csv(inputDataPath+'/'+file,delimiter=';',index_col=None, header=0)
            
            df_wms_irr = df_wms_irr[(df_wms_irr[['Value']] > 0).all(axis=1)]                
            df_wms_irr.rename(columns={"Value": "Wms_Irr", "Signal": "Inverter"}, inplace=True)          
            df_wms_irr['Inverter'] = df_wms_irr['Inverter'].str.split('_').str[0]

          
    df = pd.merge(df_today_energy,
                  df_total_energy[['Timestamp', 'Total_Energy']],
                  on='Timestamp') 
    
    df = pd.merge(df,
                  df_inv_temp[['Timestamp', 'Inv_Temp']],
                  on='Timestamp') 
    
    df = pd.merge(df,
                  df_wms_temp[['Timestamp', 'Wms_Temp']],
                  on='Timestamp') 
    
    df = pd.merge(df,
                  df_wms_irr[['Timestamp', 'Wms_Irr']],
                  on='Timestamp')    
    
    
    totalEnergyQ1 = df.Total_Energy.quantile(0.25)
    totalEnergyQ3 = df.Total_Energy.quantile(0.75)
    totalEnergyUpper = totalEnergyQ3 + 1.5*(totalEnergyQ3-totalEnergyQ1)
    df = df[(df[['Total_Energy']] < totalEnergyUpper).all(axis=1)]     
    
    invTempQ1 = df.Inv_Temp.quantile(0.25)
    invTempQ3 = df.Inv_Temp.quantile(0.75)
    invTempUpper = invTempQ3 + 1.5*(invTempQ3-invTempQ1)
    invTempLower = invTempQ1 - 1.5*(invTempQ3-invTempQ1)           
    df = df[((df[['Inv_Temp']] > invTempLower)
                              & (df[['Inv_Temp']] < invTempUpper)).all(axis=1)]        
    
    wmsTempQ1 = df.Wms_Temp.quantile(0.25)
    wmsTempQ3 = df.Wms_Temp.quantile(0.75)
    wmsTempUpper = wmsTempQ3 + 1.5*(wmsTempQ3-wmsTempQ1)
    wmsTempLower = wmsTempQ1 - 1.5*(wmsTempQ3-wmsTempQ1)           
    df = df[((df[['Wms_Temp']] > wmsTempLower)
                              & (df[['Wms_Temp']] < wmsTempUpper)).all(axis=1)]    
    
    wmsIrrQ1 = df.Wms_Irr.quantile(0.25)
    wmsIrrQ3 = df.Wms_Irr.quantile(0.75)
    wmsIrrUpper = wmsIrrQ3 + 1.5*(wmsIrrQ3-wmsIrrQ1)
    wmsIrrLower = wmsIrrQ1 - 1.5*(wmsIrrQ3-wmsIrrQ1)           
    df = df[((df[['Wms_Irr']] > wmsIrrLower)
                              & (df[['Wms_Irr']] < wmsIrrUpper)).all(axis=1)]     
    
#     df.loc[:, 'Date'] = pd.to_datetime(df[['Date']].stack()).unstack()
#     df.loc[:, 'Only_Date'] = df.Date.apply(lambda x: str(x.date()))
#     df.loc[:, 'Hour'] = df.Date.apply(lambda x: int(str(x.time()).split(':')[0]))
#     df.loc[:, 'Minute'] = df.Date.apply(lambda x: int(str(x.time()).split(':')[1]))
    df = df.drop(['Timestamp'], axis=1)    
    
    #     This part need to be deleted once we have prediction model
    df['random'] = random.uniform(0, 1)
    df.loc[:, 'random'] = df.random.apply(lambda x:  random.uniform(0.5, 1.5)) 
    df['Predict_Energy'] = df['Energy']*df['random']
    df = df.drop(['random'], axis=1)   
    
    return df
    

In [5]:
def cleanFile(dataYear):
    df_all = pd.DataFrame()
    df_year = pd.DataFrame()
    dataYear = dataYear
    invNo = ['inv01',
             'inv02',
             'inv03',
             'inv04',
             'inv05',
             'inv06',
             'inv07',
             'inv08',
             'inv09',
             'inv10']
    for year in dataYear:
        for inv in invNo:
            df_year = df_year.append(dataClean(year, inv), ignore_index = True)
        
    df_all = df_all.append(df_year)
    return df_all

In [6]:
cleanFile([2020,2021]).to_csv('/Users/qilu/Desktop/Advanticsys/Data/Output_Data/all_combine_data_by_minute_for_prediction.csv', index=False)

In [7]:
df_all = cleanFile([2020,2021])

In [8]:
# Covert date column into datetime format
df_all['Date'] = pd.to_datetime(df_all['Date'])

In [9]:
# get the date part
df_all['Only_Date'] = df_all['Date'].dt.date

In [10]:
# get the hour part
df_all['Hour'] = df_all['Date'].dt.hour

In [11]:
# get the minute part
df_all['Minute'] = df_all['Date'].dt.minute

In [12]:
df_all.tail()

Unnamed: 0,Date,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Only_Date,Hour,Minute
3368166,2021-12-31 16:53:00,Inv10,1.0,107315.0,38.5,22.1,25.0,1.171642,2021-12-31,16,53
3368167,2021-12-31 17:01:00,Inv10,1.0,107315.0,38.5,22.1,28.0,0.785966,2021-12-31,17,1
3368168,2021-12-31 17:09:00,Inv10,1.0,107315.0,38.5,22.2,7.0,1.395564,2021-12-31,17,9
3368169,2021-12-31 17:21:00,Inv10,1.0,107316.0,38.5,21.2,17.0,1.019273,2021-12-31,17,21
3368170,2021-12-31 17:30:00,Inv10,1.0,107316.0,38.5,20.8,9.0,1.271987,2021-12-31,17,30


In [13]:
# set formula to allocate minutes into 15 mins interval
def quarter(x):
    if x < 16:
        return 15
    elif x < 31:
        return 30
    elif x < 46:
        return 45
    else:
        return 59

In [14]:
df_all.loc[:, 'Quarter'] = df_all.Minute.apply(lambda x: quarter(x))

In [15]:
# create a column combine time and inverter No.
df_all['Date_Hour_Quarter_Inv'] = df_all['Only_Date'].astype(str) + '_' + df_all['Hour'].astype(str) + '_' + df_all['Quarter'].astype(str) + '_' + df_all['Inverter'].astype(str)

In [16]:
# Combine minutes into 15 min interval
df_all = df_all.drop(['Date', 'Minute'], axis=1)
df_all.rename(columns={"Quarter": "Minute"}, inplace=True)

In [17]:
# group data into 15 mins interval
agg_functions = {'Date_Hour_Quarter_Inv': 'first',
                 'Inverter': 'first',
                 'Energy': 'sum',
                 'Total_Energy': 'max',     
                 'Inv_Temp': 'mean',                 
                 'Wms_Temp': 'mean',
                 'Wms_Irr': 'max',                 
                 'Predict_Energy': 'sum',                   
                 'Only_Date': 'first',
                 'Hour': 'first',                                     
                 'Minute': 'first',}
#create new DataFrame by combining rows with same id values
df_all = df_all.groupby(df_all['Date_Hour_Quarter_Inv']).aggregate(agg_functions)   
df_all = df_all.drop(['Date_Hour_Quarter_Inv'], axis=1)   


In [18]:
df_all['Date'] = df_all['Only_Date'].astype(str) +" "+ df_all['Hour'].astype(str) + ':' + df_all['Minute'].astype(str) + ':' + '00'

In [19]:
df_all = df_all.drop(['Hour', 'Minute'], axis=1)

In [20]:
df_all['Period_Label'] = 'reporting_period'

In [21]:
current_end_date = df_all['Only_Date'].max()
current_start_date = current_end_date - pd.DateOffset(days=10)
previous_start_date = current_end_date - pd.DateOffset(days=20)
previous_month_end_date = current_end_date - pd.DateOffset(months=1)
previous_month_start_date = previous_month_end_date - pd.DateOffset(days=10)
previous_year_end_date = current_end_date - pd.DateOffset(years=1)
previous_year_start_date = previous_year_end_date - pd.DateOffset(days=10)

In [22]:
df_all.loc[(df_all['Only_Date']<=current_end_date) & (df_all['Only_Date']>current_start_date), 'Period_Label'] = 'current_period'

In [23]:
df_all.loc[(df_all['Only_Date']<=current_start_date) & (df_all['Only_Date']>previous_start_date), 'Period_Label'] = 'previous_period'

In [24]:
df_all.loc[(df_all['Only_Date']<=previous_month_end_date) & (df_all['Only_Date']>previous_month_start_date), 'Period_Label'] = 'previous_month'

In [25]:
df_all.loc[(df_all['Only_Date']<=previous_year_end_date) & (df_all['Only_Date']>previous_year_start_date), 'Period_Label'] = 'previous_year'

In [26]:
df_all = df_all.drop(['Only_Date'], axis=1)

In [27]:
df_all

Unnamed: 0_level_0,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Date,Period_Label
Date_Hour_Quarter_Inv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-01_10_15_Inv03,Inv03,16.0,5170.0,43.830769,22.915385,721.0,15.178446,2020-01-01 10:15:00,reporting_period
2020-01-01_10_15_Inv04,Inv04,18.0,8722.0,34.871429,22.921429,721.0,18.560393,2020-01-01 10:15:00,reporting_period
2020-01-01_10_15_Inv05,Inv05,22.0,11308.0,37.560000,22.893333,721.0,20.291965,2020-01-01 10:15:00,reporting_period
2020-01-01_10_30_Inv03,Inv03,17.0,5172.0,42.976923,23.692308,884.0,16.229366,2020-01-01 10:30:00,reporting_period
2020-01-01_10_30_Inv04,Inv04,18.0,8723.0,35.746154,23.723077,884.0,18.526685,2020-01-01 10:30:00,reporting_period
...,...,...,...,...,...,...,...,...,...
2021-12-31_9_59_Inv06,Inv06,45.0,174485.0,35.050000,23.142857,405.0,43.708580,2021-12-31 9:59:00,current_period
2021-12-31_9_59_Inv07,Inv07,42.0,168419.0,36.535714,23.142857,405.0,38.603362,2021-12-31 9:59:00,current_period
2021-12-31_9_59_Inv08,Inv08,43.0,168580.0,34.307143,23.142857,405.0,47.442557,2021-12-31 9:59:00,current_period
2021-12-31_9_59_Inv09,Inv09,24.0,99757.0,35.150000,23.142857,405.0,27.049869,2021-12-31 9:59:00,current_period


In [28]:
def dataForAnalysis(df, invNo):
       
   
    df = df[(df[['Inverter']] == invNo).all(axis=1)] 

    df_c = df[(df[['Period_Label']] == 'current_period').all(axis=1)]   
    df_p = df[(df[['Period_Label']] == 'previous_period').all(axis=1)]   
    df_pm = df[(df[['Period_Label']] == 'previous_month').all(axis=1)]   
    df_py = df[(df[['Period_Label']] == 'previous_year').all(axis=1)]    
    
    # c: current, ce: current energy, ci: current irradiance, cit: current inv temp, cwt: current wms temp
    # p: previous, pe: previous energy, pi: previous irradiance, pit: previous inv temp, pwt: previous wms temp
    # pm: previous month, pme: previous month energy, pmi: previous month irradiance, pmit: previous month inv temp, pmwt: previous month wms temp
    # py: previous year, pye: previous year energy, pyi: previous year irradiance, pyit: previous year inv temp, pywt: previous year wms temp
    
    ce = df_c['Energy']
    ci = df_c['Wms_Irr']
    cit = df_c['Inv_Temp']
    cwt = df_c['Wms_Temp']

    pe = df_p['Energy']
    pi = df_p['Wms_Irr']
    pit = df_p['Inv_Temp']
    pwt = df_p['Wms_Temp']

    pme = df_pm['Energy']
    pmi = df_pm['Wms_Irr']
    pmit = df_pm['Inv_Temp']
    pmwt = df_pm['Wms_Temp']

    pye = df_py['Energy']
    pyi = df_py['Wms_Irr']
    pyit = df_py['Inv_Temp']
    pywt = df_py['Wms_Temp']
    
    #normality test - <0.05 not normal distribution, >=0.05 normal
    shapiro_ce = stats.shapiro(ce).pvalue
    shapiro_ci = stats.shapiro(ci).pvalue
    shapiro_cit = stats.shapiro(cit).pvalue
    shapiro_cwt = stats.shapiro(cwt).pvalue
    
    shapiro_pe = stats.shapiro(pe).pvalue
    shapiro_pi = stats.shapiro(pi).pvalue
    shapiro_pit = stats.shapiro(pit).pvalue
    shapiro_pwt = stats.shapiro(pwt).pvalue
    
    shapiro_pme = stats.shapiro(pme).pvalue
    shapiro_pmi = stats.shapiro(pmi).pvalue
    shapiro_pmit = stats.shapiro(pmit).pvalue
    shapiro_pmwt = stats.shapiro(pmwt).pvalue
    
    shapiro_pye = stats.shapiro(pye).pvalue
    shapiro_pyi = stats.shapiro(pyi).pvalue
    shapiro_pyit = stats.shapiro(pyit).pvalue
    shapiro_pywt = stats.shapiro(pywt).pvalue    
    
    #equal medium (kruskal) test, <0.05 not equal, >=0.05 equal
    medium_cpe = stats.kruskal(ce, pe).pvalue
    medium_cpme = stats.kruskal(ce, pme).pvalue
    medium_cpye = stats.kruskal(ce, pye).pvalue

    medium_cpi = stats.kruskal(ci, pi).pvalue
    medium_cpmi = stats.kruskal(ci, pmi).pvalue
    medium_cpyi = stats.kruskal(ci, pyi).pvalue

    medium_cpit = stats.kruskal(cit, pit).pvalue
    medium_cpmit = stats.kruskal(cit, pmit).pvalue
    medium_cpyit = stats.kruskal(cit, pyit).pvalue

    medium_cpwt = stats.kruskal(cwt, pwt).pvalue
    medium_cpmwt = stats.kruskal(cwt, pmwt).pvalue
    medium_cpywt = stats.kruskal(cwt, pywt).pvalue   
    
    # equal variance (levene) test, <0.05 not equal, >=0.05 equal
    var_cpe = stats.levene(ce, pe).pvalue
    var_cpme = stats.levene(ce, pme).pvalue
    var_cpye = stats.levene(ce, pye).pvalue 
    
    var_cpi = stats.levene(ci, pi).pvalue
    var_cpmi = stats.levene(ci, pmi).pvalue
    var_cpyi = stats.levene(ci, pyi).pvalue 
    
    var_cpit = stats.levene(cit, pit).pvalue
    var_cpmit = stats.levene(cit, pmit).pvalue
    var_cpyit = stats.levene(cit, pyit).pvalue     
    
    var_cpwt = stats.levene(cwt, pwt).pvalue
    var_cpmwt = stats.levene(cwt, pmwt).pvalue
    var_cpywt = stats.levene(cwt, pywt).pvalue    
    
    # mean comparison (mannwhitneyu) test, <0.05 current less, >=0.05 current not less
    mean_cpe = stats.mannwhitneyu(ce, pe, alternative="less").pvalue
    mean_cpme = stats.mannwhitneyu(ce, pme, alternative="less").pvalue
    mean_cpye = stats.mannwhitneyu(ce, pye, alternative="less").pvalue

    mean_cpi = stats.mannwhitneyu(ci, pi, alternative="less").pvalue
    mean_cpmi = stats.mannwhitneyu(ci, pmi, alternative="less").pvalue
    mean_cpyi = stats.mannwhitneyu(ci, pyi, alternative="less").pvalue

    mean_cpit = stats.mannwhitneyu(cit, pit, alternative="less").pvalue
    mean_cpmit = stats.mannwhitneyu(cit, pmit, alternative="less").pvalue
    mean_cpyit = stats.mannwhitneyu(cit, pyit, alternative="less").pvalue
  
    mean_cpwt = stats.mannwhitneyu(cwt, pwt, alternative="less").pvalue
    mean_cpmwt = stats.mannwhitneyu(cwt, pmwt, alternative="less").pvalue
    mean_cpywt = stats.mannwhitneyu(cwt, pywt, alternative="less").pvalue
    

    cvp = ''
    cvpe = ''
    if mean_cpe < 0.05:
        cvpe = 'less' 
    elif mean_cpe >= 0.05 and var_cpe < 0.05:
        cvpe = 'not_less_with_variation'
    elif mean_cpe >= 0.05 and var_cpe >= 0.05:
        cvpe = 'not_less'    
        
    cvpi = ''
    if mean_cpi < 0.05:
        cvpi = 'less'
    elif mean_cpi >= 0.05 and var_cpi < 0.05:
        cvpi = 'not_less_with_variation'
    elif mean_cpi >= 0.05 and var_cpi >= 0.05:
        cvpi = 'not_less'    
        
    cvp = cvpe + ' ' + cvpi      
        
    cvpm = ''
    cvpme = ''
    if mean_cpme < 0.05:
        cvpme = 'less'
    elif mean_cpme >= 0.05 and var_cpme < 0.05:
        cvpme = 'not_less_with_variation'
    elif mean_cpme >= 0.05 and var_cpme >= 0.05:
        cvpme = 'not_less'    
        
    cvpmi = ''
    if mean_cpmi < 0.05:
        cvpmi = 'less'
    elif mean_cpmi >= 0.05 and var_cpmi < 0.05:
        cvpmi = 'not_less_with_variation'
    elif mean_cpmi >= 0.05 and var_cpmi >= 0.05:
        cvpmi = 'not_less'    
        
    cvpm = cvpme + ' ' + cvpmi      
        
    cvpy = ''
    cvpye = ''
    if mean_cpye < 0.05:
        cvpye = 'less'
    elif mean_cpye >= 0.05 and var_cpye < 0.05:
        cvpye = 'not_less_with_variation'
    elif mean_cpye >= 0.05 and var_cpye >= 0.05:
        cvpye = 'not_less'    
        
    cvpyi = ''
    if mean_cpyi < 0.05:
        cvpyi = 'less'
    elif mean_cpyi >= 0.05 and var_cpyi < 0.05:
        cvpyi = 'not_less_with_variation'
    elif mean_cpyi >= 0.05 and var_cpyi >= 0.05:
        cvpyi = 'not_less'    
        
    cvpy = cvpye + ' ' + cvpyi  
    
    
    
    df['Current_vs_Previous'] = ' '
    df['Current_vs_Previous_Month'] = ' '
    df['Current_vs_Previous_Year'] = ' '
    df.loc[df['Period_Label'] == 'current_period', 'Current_vs_Previous'] = cvp
    df.loc[df['Period_Label'] == 'current_period', 'Current_vs_Previous_Month'] = cvpm
    df.loc[df['Period_Label'] == 'current_period', 'Current_vs_Previous_Year'] = cvpy
  

    return df
    
    



In [29]:
dataForAnalysis(df_all, 'Inv01')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Month'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Year'] = ' '


Unnamed: 0_level_0,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Date,Period_Label,Current_vs_Previous,Current_vs_Previous_Month,Current_vs_Previous_Year
Date_Hour_Quarter_Inv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-03-15_12_45_Inv01,Inv01,6.0,12332.0,36.483333,36.883333,948.0,5.830083,2020-03-15 12:45:00,reporting_period,,,
2020-03-15_12_59_Inv01,Inv01,8.0,12333.0,41.825000,35.787500,949.0,7.374860,2020-03-15 12:59:00,reporting_period,,,
2020-03-15_13_15_Inv01,Inv01,8.0,12334.0,45.087500,34.975000,952.0,9.141270,2020-03-15 13:15:00,reporting_period,,,
2020-03-15_13_30_Inv01,Inv01,8.0,12335.0,47.250000,35.200000,946.0,8.469210,2020-03-15 13:30:00,reporting_period,,,
2020-03-15_13_45_Inv01,Inv01,8.0,12336.0,48.600000,35.437500,920.0,8.262580,2020-03-15 13:45:00,reporting_period,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-31_8_59_Inv01,Inv01,16.0,144878.0,42.146154,20.676923,189.0,15.388478,2021-12-31 8:59:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less
2021-12-31_9_15_Inv01,Inv01,21.0,144880.0,42.956250,21.125000,229.0,16.873524,2021-12-31 9:15:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less
2021-12-31_9_30_Inv01,Inv01,41.0,144884.0,40.166667,22.013333,736.0,35.909264,2021-12-31 9:30:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less
2021-12-31_9_45_Inv01,Inv01,44.0,144888.0,37.700000,23.253333,663.0,44.465492,2021-12-31 9:45:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less


In [30]:
df_15min_combine = pd.DataFrame()
invNo = ['Inv01',
         'Inv02',
         'Inv03',
         'Inv04',
         'Inv05',
         'Inv06',
         'Inv07',
         'Inv08',
         'Inv09',
         'Inv10']

for inv in invNo:
    df_15min_combine = df_15min_combine.append(dataForAnalysis(df_all,inv), ignore_index = True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Month'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Year'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Month'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Year'] = ' '
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_

In [31]:
df_15min_combine

Unnamed: 0,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Date,Period_Label,Current_vs_Previous,Current_vs_Previous_Month,Current_vs_Previous_Year
0,Inv01,6.0,12332.0,36.483333,36.883333,948.0,5.830083,2020-03-15 12:45:00,reporting_period,,,
1,Inv01,8.0,12333.0,41.825000,35.787500,949.0,7.374860,2020-03-15 12:59:00,reporting_period,,,
2,Inv01,8.0,12334.0,45.087500,34.975000,952.0,9.141270,2020-03-15 13:15:00,reporting_period,,,
3,Inv01,8.0,12335.0,47.250000,35.200000,946.0,8.469210,2020-03-15 13:30:00,reporting_period,,,
4,Inv01,8.0,12336.0,48.600000,35.437500,920.0,8.262580,2020-03-15 13:45:00,reporting_period,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
274414,Inv10,11.0,107245.0,32.272727,20.718182,176.0,11.050831,2021-12-31 8:59:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less
274415,Inv10,16.0,107247.0,32.800000,21.214286,243.0,15.915434,2021-12-31 9:15:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less
274416,Inv10,29.0,107250.0,33.453333,22.346667,662.0,26.564144,2021-12-31 9:30:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less
274417,Inv10,34.0,107253.0,34.620000,23.386667,680.0,29.657538,2021-12-31 9:45:00,current_period,not_less_with_variation not_less,not_less_with_variation not_less,less not_less


In [32]:
df_15min_combine.to_csv('/Users/qilu/Desktop/Advanticsys/Data/Output_Data/15min_combine_data_for_report.csv', index=False)