In [1]:
import pandas as pd
import os
import re
from datetime import datetime
import researchpy as rp
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
import shutil
from scipy import stats
from scipy.stats import norm, skew, f, ttest_ind, ttest_rel, mannwhitneyu, levene, bartlett
import requests
import random
import numpy as np
from sklearn.svm import OneClassSVM


In [2]:
# Read 2 years combine data
df_all = pd.read_csv('/Users/qilu/Desktop/Advanticsys/Data/Output_Data/all_combine.csv',index_col=None, header=0)
df_all.head()

Unnamed: 0,Date,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy
0,2021-01-01 07:03:00,Inv01,1.0,69998.0,31.8,16.9,29.0,0.699852
1,2021-01-01 07:06:00,Inv01,1.0,69998.0,32.2,16.9,39.0,0.877316
2,2021-01-01 07:09:00,Inv01,1.0,69999.0,32.6,17.0,34.0,0.741333
3,2021-01-01 07:12:00,Inv01,1.0,69999.0,33.1,17.0,36.0,1.305307
4,2021-01-01 07:15:00,Inv01,1.0,69999.0,33.6,17.0,38.0,1.362297


In [3]:
# Covert date column into datetime format
df_all['Date'] = pd.to_datetime(df_all['Date'])

In [4]:
# get the date part
df_all['Only_Date'] = df_all['Date'].dt.date

In [5]:
# get the hour part
df_all['Hour'] = df_all['Date'].dt.hour

In [6]:
# get the minute part
df_all['Minute'] = df_all['Date'].dt.minute

In [7]:
df_all.head()

Unnamed: 0,Date,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Only_Date,Hour,Minute
0,2021-01-01 07:03:00,Inv01,1.0,69998.0,31.8,16.9,29.0,0.699852,2021-01-01,7,3
1,2021-01-01 07:06:00,Inv01,1.0,69998.0,32.2,16.9,39.0,0.877316,2021-01-01,7,6
2,2021-01-01 07:09:00,Inv01,1.0,69999.0,32.6,17.0,34.0,0.741333,2021-01-01,7,9
3,2021-01-01 07:12:00,Inv01,1.0,69999.0,33.1,17.0,36.0,1.305307,2021-01-01,7,12
4,2021-01-01 07:15:00,Inv01,1.0,69999.0,33.6,17.0,38.0,1.362297,2021-01-01,7,15


In [8]:
# set formula to allocate minutes into 15 mins interval
def quarter(x):
    if x < 16:
        return 15
    elif x < 31:
        return 30
    elif x < 46:
        return 45
    else:
        return 60

In [9]:
df_all.loc[:, 'Quarter'] = df_all.Minute.apply(lambda x: quarter(x))

In [10]:
df_all.head()

Unnamed: 0,Date,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Only_Date,Hour,Minute,Quarter
0,2021-01-01 07:03:00,Inv01,1.0,69998.0,31.8,16.9,29.0,0.699852,2021-01-01,7,3,15
1,2021-01-01 07:06:00,Inv01,1.0,69998.0,32.2,16.9,39.0,0.877316,2021-01-01,7,6,15
2,2021-01-01 07:09:00,Inv01,1.0,69999.0,32.6,17.0,34.0,0.741333,2021-01-01,7,9,15
3,2021-01-01 07:12:00,Inv01,1.0,69999.0,33.1,17.0,36.0,1.305307,2021-01-01,7,12,15
4,2021-01-01 07:15:00,Inv01,1.0,69999.0,33.6,17.0,38.0,1.362297,2021-01-01,7,15,15


In [11]:
# create a column combine time and inverter No.
df_all['Date_Hour_Quarter_Inv'] = df_all['Only_Date'].astype(str) + '_' + df_all['Hour'].astype(str) + '_' + df_all['Quarter'].astype(str) + '_' + df_all['Inverter'].astype(str)

In [12]:
df_all.tail()

Unnamed: 0,Date,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Only_Date,Hour,Minute,Quarter,Date_Hour_Quarter_Inv
3368166,2020-12-31 17:21:00,Inv09,1.0,46748.0,36.9,23.1,42.0,0.784433,2020-12-31,17,21,30,2020-12-31_17_30_Inv09
3368167,2020-12-31 17:26:00,Inv09,1.0,46748.0,36.9,23.0,34.0,1.043442,2020-12-31,17,26,30,2020-12-31_17_30_Inv09
3368168,2020-12-31 17:32:00,Inv09,1.0,46748.0,36.9,22.8,26.0,0.745356,2020-12-31,17,32,45,2020-12-31_17_45_Inv09
3368169,2020-12-31 17:39:00,Inv09,1.0,46748.0,36.9,22.7,17.0,0.72568,2020-12-31,17,39,45,2020-12-31_17_45_Inv09
3368170,2020-12-31 17:52:00,Inv09,1.0,46749.0,36.9,22.5,2.0,1.311872,2020-12-31,17,52,60,2020-12-31_17_60_Inv09


In [13]:
# Combine minutes into 15 min interval
df_all = df_all.drop(['Date', 'Minute'], axis=1)
df_all.rename(columns={"Quarter": "Minute"}, inplace=True)

In [14]:
# group data into 15 mins interval
agg_functions = {'Date_Hour_Quarter_Inv': 'first',
                 'Inverter': 'first',
                 'Energy': 'sum',
                 'Total_Energy': 'max',     
                 'Inv_Temp': 'mean',                 
                 'Wms_Temp': 'mean',
                 'Wms_Irr': 'max',                 
                 'Predict_Energy': 'sum',                   
                 'Only_Date': 'first',
                 'Hour': 'first',                                     
                 'Minute': 'first',}
#create new DataFrame by combining rows with same id values
df_all = df_all.groupby(df_all['Date_Hour_Quarter_Inv']).aggregate(agg_functions)   
df_all = df_all.drop(['Date_Hour_Quarter_Inv'], axis=1)   


In [15]:
df_all.tail()

Unnamed: 0_level_0,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Only_Date,Hour,Minute
Date_Hour_Quarter_Inv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-12-31_9_60_Inv06,Inv06,45.0,174485.0,35.05,23.142857,405.0,48.893694,2021-12-31,9,60
2021-12-31_9_60_Inv07,Inv07,42.0,168419.0,36.535714,23.142857,405.0,40.733614,2021-12-31,9,60
2021-12-31_9_60_Inv08,Inv08,43.0,168580.0,34.307143,23.142857,405.0,41.512694,2021-12-31,9,60
2021-12-31_9_60_Inv09,Inv09,24.0,99757.0,35.15,23.142857,405.0,25.012765,2021-12-31,9,60
2021-12-31_9_60_Inv10,Inv10,26.0,107256.0,35.621429,23.142857,405.0,29.205809,2021-12-31,9,60


In [16]:
df_all['Date'] = df_all['Only_Date'].astype(str) +" "+ df_all['Hour'].astype(str) + ':' + df_all['Minute'].astype(str) + ':' + '00'


In [17]:
df_all

Unnamed: 0_level_0,Inverter,Energy,Total_Energy,Inv_Temp,Wms_Temp,Wms_Irr,Predict_Energy,Only_Date,Hour,Minute,Date
Date_Hour_Quarter_Inv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-01_10_15_Inv03,Inv03,16.0,5170.0,43.830769,22.915385,721.0,16.027770,2020-01-01,10,15,2020-01-01 10:15:00
2020-01-01_10_15_Inv04,Inv04,18.0,8722.0,34.871429,22.921429,721.0,21.298455,2020-01-01,10,15,2020-01-01 10:15:00
2020-01-01_10_15_Inv05,Inv05,22.0,11308.0,37.560000,22.893333,721.0,21.242665,2020-01-01,10,15,2020-01-01 10:15:00
2020-01-01_10_30_Inv03,Inv03,17.0,5172.0,42.976923,23.692308,884.0,18.996703,2020-01-01,10,30,2020-01-01 10:30:00
2020-01-01_10_30_Inv04,Inv04,18.0,8723.0,35.746154,23.723077,884.0,16.968059,2020-01-01,10,30,2020-01-01 10:30:00
...,...,...,...,...,...,...,...,...,...,...,...
2021-12-31_9_60_Inv06,Inv06,45.0,174485.0,35.050000,23.142857,405.0,48.893694,2021-12-31,9,60,2021-12-31 9:60:00
2021-12-31_9_60_Inv07,Inv07,42.0,168419.0,36.535714,23.142857,405.0,40.733614,2021-12-31,9,60,2021-12-31 9:60:00
2021-12-31_9_60_Inv08,Inv08,43.0,168580.0,34.307143,23.142857,405.0,41.512694,2021-12-31,9,60,2021-12-31 9:60:00
2021-12-31_9_60_Inv09,Inv09,24.0,99757.0,35.150000,23.142857,405.0,25.012765,2021-12-31,9,60,2021-12-31 9:60:00


In [18]:
df_all = df_all.drop(['Hour', 'Minute'], axis=1)

In [19]:
# save grouped data into file
# df_all.to_csv('/Users/qilu/Desktop/Advanticsys/Data/Output_Data/15_min_summary.csv', index=False)

In [20]:
def dataForAnalysis(invNo, reportDate):
       
#     dirpath = "/Users/qilu/Desktop/Advanticsys/Data/Output_Data/Data_for_analysis_15mins_interval"
    
    df = df_all[(df_all[['Inverter']] == invNo).all(axis=1)] 
    
    current_end_date = reportDate
    current_start_date = current_end_date - pd.DateOffset(days=10)
    previous_start_date = current_end_date - pd.DateOffset(days=20)
    previous_month_end_date = current_end_date - pd.DateOffset(months=1)
    previous_month_start_date = previous_month_end_date - pd.DateOffset(days=10)
    previous_year_end_date = current_end_date - pd.DateOffset(years=1)
    previous_year_start_date = previous_year_end_date - pd.DateOffset(days=10)
    
    df_c = df[((df[['Only_Date']] <= current_end_date)
                              & (df[['Only_Date']] > current_start_date)).all(axis=1)]  
    df_p = df[((df[['Only_Date']] <= current_start_date)
                                  & (df[['Only_Date']] > previous_start_date)).all(axis=1)]     
    df_pm = df[((df[['Only_Date']] <= previous_month_end_date)
                                  & (df[['Only_Date']] > previous_month_start_date)).all(axis=1)]   
    df_py = df[((df[['Only_Date']] <= previous_year_end_date)
                                  & (df[['Only_Date']] > previous_year_start_date)).all(axis=1)]  
    
    df_c_inv = df_c[(df_c[['Inverter']] == invNo).all(axis=1)] 
    df_p_inv = df_p[(df_p[['Inverter']] == invNo).all(axis=1)]     
    df_pm_inv = df_pm[(df_pm[['Inverter']] == invNo).all(axis=1)]    
    df_py_inv = df_py[(df_py[['Inverter']] == invNo).all(axis=1)]   
    
    # c: current, ce: current energy, ci: current irradiance, cit: current inv temp, cwt: current wms temp
    # p: previous, pe: previous energy, pi: previous irradiance, pit: previous inv temp, pwt: previous wms temp
    # pm: previous month, pme: previous month energy, pmi: previous month irradiance, pmit: previous month inv temp, pmwt: previous month wms temp
    # py: previous year, pye: previous year energy, pyi: previous year irradiance, pyit: previous year inv temp, pywt: previous year wms temp
    
    ce = df_c_inv['Energy']
    ci = df_c_inv['Wms_Irr']
    cit = df_c_inv['Inv_Temp']
    cwt = df_c_inv['Wms_Temp']

    pe = df_p_inv['Energy']
    pi = df_p_inv['Wms_Irr']
    pit = df_p_inv['Inv_Temp']
    pwt = df_p_inv['Wms_Temp']

    pme = df_pm_inv['Energy']
    pmi = df_pm_inv['Wms_Irr']
    pmit = df_pm_inv['Inv_Temp']
    pmwt = df_pm_inv['Wms_Temp']

    pye = df_py_inv['Energy']
    pyi = df_py_inv['Wms_Irr']
    pyit = df_py_inv['Inv_Temp']
    pywt = df_py_inv['Wms_Temp']
    
    # normality test - <0.05 not normal distribution, >=0.05 normal
    shapiro_ce = stats.shapiro(ce).pvalue
    shapiro_ci = stats.shapiro(ci).pvalue
    shapiro_cit = stats.shapiro(cit).pvalue
    shapiro_cwt = stats.shapiro(cwt).pvalue
    
    shapiro_pe = stats.shapiro(pe).pvalue
    shapiro_pi = stats.shapiro(pi).pvalue
    shapiro_pit = stats.shapiro(pit).pvalue
    shapiro_pwt = stats.shapiro(pwt).pvalue
    
    shapiro_pme = stats.shapiro(pme).pvalue
    shapiro_pmi = stats.shapiro(pmi).pvalue
    shapiro_pmit = stats.shapiro(pmit).pvalue
    shapiro_pmwt = stats.shapiro(pmwt).pvalue
    
    shapiro_pye = stats.shapiro(pye).pvalue
    shapiro_pyi = stats.shapiro(pyi).pvalue
    shapiro_pyit = stats.shapiro(pyit).pvalue
    shapiro_pywt = stats.shapiro(pywt).pvalue    
    
    # equal medium (kruskal) test, <0.05 not equal, >=0.05 equal
    medium_cpe = stats.kruskal(ce, pe).pvalue
    medium_cpme = stats.kruskal(ce, pme).pvalue
    medium_cpye = stats.kruskal(ce, pye).pvalue

    medium_cpi = stats.kruskal(ci, pi).pvalue
    medium_cpmi = stats.kruskal(ci, pmi).pvalue
    medium_cpyi = stats.kruskal(ci, pyi).pvalue

    medium_cpit = stats.kruskal(cit, pit).pvalue
    medium_cpmit = stats.kruskal(cit, pmit).pvalue
    medium_cpyit = stats.kruskal(cit, pyit).pvalue

    medium_cpwt = stats.kruskal(cwt, pwt).pvalue
    medium_cpmwt = stats.kruskal(cwt, pmwt).pvalue
    medium_cpywt = stats.kruskal(cwt, pywt).pvalue   
    
    # equal variance (levene) test, <0.05 not equal, >=0.05 equal
    var_cpe = stats.levene(ce, pe).pvalue
    var_cpme = stats.levene(ce, pme).pvalue
    var_cpye = stats.levene(ce, pye).pvalue 
    
    var_cpi = stats.levene(ci, pi).pvalue
    var_cpmi = stats.levene(ci, pmi).pvalue
    var_cpyi = stats.levene(ci, pyi).pvalue 
    
    var_cpit = stats.levene(cit, pit).pvalue
    var_cpmit = stats.levene(cit, pmit).pvalue
    var_cpyit = stats.levene(cit, pyit).pvalue     
    
    var_cpwt = stats.levene(cwt, pwt).pvalue
    var_cpmwt = stats.levene(cwt, pmwt).pvalue
    var_cpywt = stats.levene(cwt, pywt).pvalue    
    
    # mean comparison (mannwhitneyu) test, <0.05 current not less, >=0.05 current less
    mean_cpe = stats.mannwhitneyu(ce, pe, alternative="less").pvalue
    mean_cpme = stats.mannwhitneyu(ce, pme, alternative="less").pvalue
    mean_cpye = stats.mannwhitneyu(ce, pye, alternative="less").pvalue

    mean_cpi = stats.mannwhitneyu(ci, pi, alternative="less").pvalue
    mean_cpmi = stats.mannwhitneyu(ci, pmi, alternative="less").pvalue
    mean_cpyi = stats.mannwhitneyu(ci, pyi, alternative="less").pvalue

    mean_cpit = stats.mannwhitneyu(cit, pit, alternative="less").pvalue
    mean_cpmit = stats.mannwhitneyu(cit, pmit, alternative="less").pvalue
    mean_cpyit = stats.mannwhitneyu(cit, pyit, alternative="less").pvalue
  
    mean_cpwt = stats.mannwhitneyu(cwt, pwt, alternative="less").pvalue
    mean_cpmwt = stats.mannwhitneyu(cwt, pmwt, alternative="less").pvalue
    mean_cpywt = stats.mannwhitneyu(cwt, pywt, alternative="less").pvalue
      
    cvp = ''
    cvpe = ''
    if mean_cpe < 0.05 and var_cpe < 0.05:
        cvpe = 'changed_var' 
    elif mean_cpe < 0.05 and var_cpe >= 0.05:
        cvpe = 'not_less'
    elif mean_cpe >= 0.05 and var_cpe < 0.05:
        cvpe = 'less'
    elif mean_cpe >= 0.05 and var_cpe >= 0.05:
        cvpe = 'less'    
        
    cvpi = ''
    if mean_cpi < 0.05 and var_cpi < 0.05:
        cvpi = 'changed_var' 
    elif mean_cpi < 0.05 and var_cpi >= 0.05:
        cvpi = 'not_less'
    elif mean_cpi >= 0.05 and var_cpi < 0.05:
        cvpi = 'less'
    elif mean_cpi >= 0.05 and var_cpi >= 0.05:
        cvpi = 'less'    
        
    cvp = cvpe + ' ' + cvpi      
    if cvpe == 'not_less':
        cvp += ' normal'  
    elif cvpe == 'changed_var':
        cvp += ' changed_var' 
    elif cvpe == 'less' and cvpi == 'not_less':
        cvp += ' less'
    elif cvpe == 'less' and cvpi == 'changed_var':
        cvp += ' less'
    else:
        cvp += ' normal'
        
    cvpm = ''
    cvpme = ''
    if mean_cpme < 0.05 and var_cpme < 0.05:
        cvpme = 'changed_var' 
    elif mean_cpme < 0.05 and var_cpme >= 0.05:
        cvpme = 'not_less'
    elif mean_cpme >= 0.05 and var_cpme < 0.05:
        cvpme = 'less'
    elif mean_cpme >= 0.05 and var_cpme >= 0.05:
        cvpme = 'less'    
        
    cvpmi = ''
    if mean_cpmi < 0.05 and var_cpmi < 0.05:
        cvpmi = 'changed_var' 
    elif mean_cpmi < 0.05 and var_cpmi >= 0.05:
        cvpmi = 'not_less'
    elif mean_cpmi >= 0.05 and var_cpmi < 0.05:
        cvpmi = 'less'
    elif mean_cpmi >= 0.05 and var_cpmi >= 0.05:
        cvpmi = 'less'    
        
    cvpm = cvpme + ' ' + cvpmi      
    if cvpme == 'not_less':
        cvpm += ' normal'  
    elif cvpme == 'changed_var':
        cvpm += ' changed_var' 
    elif cvpme == 'less' and cvpmi == 'not_less':
        cvpm += ' less'
    elif cvpme == 'less' and cvpmi == 'changed_var':
        cvpm += ' less'
    else:
        cvpm += ' normal'
        
    cvpy = ''
    cvpye = ''
    if mean_cpye < 0.05 and var_cpye < 0.05:
        cvpye = 'changed_var' 
    elif mean_cpye < 0.05 and var_cpye >= 0.05:
        cvpye = 'not_less'
    elif mean_cpye >= 0.05 and var_cpye < 0.05:
        cvpye = 'less'
    elif mean_cpye >= 0.05 and var_cpye >= 0.05:
        cvpye = 'less'    
        
    cvpyi = ''
    if mean_cpyi < 0.05 and var_cpyi < 0.05:
        cvpyi = 'changed_var' 
    elif mean_cpyi < 0.05 and var_cpyi >= 0.05:
        cvpyi = 'not_less'
    elif mean_cpyi >= 0.05 and var_cpyi < 0.05:
        cvpyi = 'less'
    elif mean_cpyi >= 0.05 and var_cpyi >= 0.05:
        cvpyi = 'less'    
        
    cvpy = cvpye + ' ' + cvpyi      
    if cvpye == 'not_less':
        cvpy += ' normal'  
    elif cvpye == 'changed_var':
        cvpy += ' changed_var' 
    elif cvpye == 'less' and cvpyi == 'not_less':
        cvpy += ' less'
    elif cvpye == 'less' and cvpyi == 'changed_var':
        cvpy += ' less'
    else:
        cvpy += ' normal'
                

    df['Current_vs_Previous'] = cvp
    df['Current_vs_Previous_Month'] = cvpm
    df['Current_vs_Previous_Year'] = cvpy    
    return df
    
    
#     filenames = os.listdir(dirpath)
    

In [21]:
def saveDataForAnalysis(compareDate):
    
    outputpath = "/Users/qilu/Desktop/Advanticsys/Data/Output_Data/Data_for_analysis_15mins_interval/"     
    invNo = ['Inv01',
             'Inv02',
             'Inv03',
             'Inv04',
             'Inv05',
             'Inv06',
             'Inv07',
             'Inv08',
             'Inv09',
             'Inv10']
    for inv in invNo:
        df = dataForAnalysis(inv, compareDate)
        df.to_csv(outputpath+str(inv)+'_15min_data.csv', index=False)
    
    analysisDataFiles = []     
    filenames = os.listdir(outputpath)
    
    for file in filenames:         
        inv_match = re.search('Inv', file)    
        if inv_match:
            analysisDataFiles.append(outputpath+file)   
        df_comb = pd.concat((pd.read_csv(filenames, index_col=None, header=0) for filenames in analysisDataFiles))        
        df_comb.to_csv(outputpath+'combine_15min_data.csv', index=False)
        
    return

In [22]:
saveDataForAnalysis(df_all['Only_Date'].max())

  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous'] = cvp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Month'] = cvpm
A value is trying to be set on a copy of a

  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous'] = cvp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Month'] = cvpm
A value is trying to be set on a copy of a

  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous'] = cvp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Month'] = cvpm
A value is trying to be set on a copy of a

  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
  result = libops.scalar_compare(x.ravel(), y, op)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous'] = cvp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Current_vs_Previous_Month'] = cvpm
A value is trying to be set on a copy of a