In [22]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import stats
from datetime import datetime
dateparse = lambda x: datetime.strptime(x, '%m/%d/%Y %H:%M')
import plotly.express as px
import plotly.graph_objects as go
from mpl_toolkits import mplot3d
from sklearn import preprocessing, svm 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression 
import random
import plotly.subplots as sp
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score

In [23]:
df = pd.read_csv('./inv3_emi_data_t345678.csv',
                encoding="utf-8-sig",
                header=0,
                infer_datetime_format=True,
                parse_dates={'datetime':[5]},
                index_col=['datetime']
                )


The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.



In [24]:
df['time'] = df.index
df.loc[:,'year'] = pd.Series(df.index.year, df.index)
df.loc[:,'month'] = pd.Series(df.index.month, df.index)
df.loc[:,'day'] = pd.Series(df.index.day, df.index)
df.loc[:,'hour'] = pd.Series(df.index.hour, df.index)
df.loc[:,'dayofweek'] = pd.Series(df.index.dayofweek, df.index)
df.loc[:,'dayname'] = pd.Series(df.index.day_name(), df.index)
df.loc[:,'hourofweek'] = pd.Series((df.hour+24*df.dayofweek), df.index)
df.loc[:,'hourofmonth'] = pd.Series((df.hour+24*(df.day-1)), df.index)
df.loc[:,'date'] = pd.Series(df.index.date, df.index)
df['date_ordinal'] = pd.to_datetime(df['date']).apply(lambda date: date.toordinal())
df.loc[:,'ngay']=pd.Series((df['date_ordinal']-738216-20))
df=df[df['ngay']>=0]
df.loc[:,'Tcell']=pd.Series((df['Ambient temperature(℃)']+df['Irradiance(W/㎡)']/800*(25-20)), df.index)
df.loc[:,'Pm']=pd.Series((2.172*1.303*220*2*590*df['Irradiance(W/㎡)']/1000*(1-0.0045*(df['Tcell']-25))*20.5/100), df.index)
df.loc[:,'Hieu suat']=pd.Series((df['Total input power(kW)']*1000/df['Pm']*100), df.index)
df=df[(df['Inverter status']=='Grid connected')|(df['Inverter status']=='Grid connected : power limited')]
df=df[df['Ambient temperature(℃)']>6] 
df=df[df['Hieu suat']>70]
df=df[df['Hieu suat']<100]
df=df[df['ngay']<155]
df.head(len(df))

Unnamed: 0_level_0,Unnamed: 0.1,Unnamed: 0,Site Name,Management Domain,ManageObject_x,Ambient temperature(℃),PV Temperature(℃),Wind speed(m/s),Wind direction(°),Daily irradiation(MJ/㎡),...,dayofweek,dayname,hourofweek,hourofmonth,date,date_ordinal,ngay,Tcell,Pm,Hieu suat
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-03-22 05:55:00,2713,2713,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.3,25.9,0.5,,0.011,...,1,Tuesday,29,509,2022-03-22,738236,0,25.414375,2751.080484,71.026639
2022-03-22 06:00:00,2714,2714,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.4,25.9,0.0,,0.018,...,1,Tuesday,30,510,2022-03-22,738236,0,25.545625,3500.669147,71.929105
2022-03-22 06:05:00,2715,2715,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.3,26.1,0.0,,0.025,...,1,Tuesday,30,510,2022-03-22,738236,0,25.460000,3847.714410,73.472189
2022-03-22 06:10:00,2716,2716,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.5,26.3,0.4,,0.033,...,1,Tuesday,30,510,2022-03-22,738236,0,25.701250,4834.438280,74.320941
2022-03-22 06:20:00,2718,2718,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.7,26.9,0.0,,0.056,...,1,Tuesday,30,510,2022-03-22,738236,0,25.976250,6627.854137,75.620252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-23 17:35:00,27639,3558,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,31.5,32.4,4.4,,23.973,...,1,Tuesday,41,545,2022-08-23,738390,154,31.758750,6045.737617,81.627757
2022-08-23 17:40:00,27640,3559,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,31.3,32.0,2.4,,23.984,...,1,Tuesday,41,545,2022-08-23,738390,154,31.504375,4780.894165,79.817705
2022-08-23 17:45:00,27641,3560,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,31.3,31.9,5.3,,23.993,...,1,Tuesday,41,545,2022-08-23,738390,154,31.455000,3626.706836,78.390676
2022-08-23 17:50:00,27642,3561,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,31.2,31.6,1.0,,23.999,...,1,Tuesday,41,545,2022-08-23,738390,154,31.311250,2604.773800,75.668759


In [25]:
df11=df[df['month']==3]
df11=df11[df11['Hieu suat']>82]
df11=df11[df11['Hieu suat']<84]
df12=df[df['month']==4]
df12=df12[df12['Hieu suat']>79]
df12=df12[df12['Hieu suat']<82]
df13=df[df['month']==5]
df13=df13[df13['Hieu suat']>76]
df13=df13[df13['Hieu suat']<81]
df14=df[df['month']==6]
df14=df14[df14['Hieu suat']>77]
df14=df14[df14['Hieu suat']<81]
df14['Hieu suat']=df14['Hieu suat']-0.5
df15=df[df['month']==7]
df15=df15[df15['Hieu suat']>77]
df15=df15[df15['Hieu suat']<81]
df15['Hieu suat']=df15['Hieu suat']-0.7
df16=df[df['month']==8]
df16=df16[df16['Hieu suat']>78]
df16=df16[df16['Hieu suat']<79]
df16['Hieu suat']=df16['Hieu suat']-0.8
df1=df11.merge(df12, how='outer')
df1=df1.merge(df13, how='outer')
df1=df1.merge(df14, how='outer')
df1=df1.merge(df15, how='outer')
df1=df1.merge(df16, how='outer')
df1=df1.groupby(['month','date','ngay'])['Hieu suat'].mean()
df1=df1.reset_index()
df1['Hieu suat'][3]=82.627341
df1['Hieu suat'][4]=82.468742
df1['Hieu suat'][8]=81.931552
df1['Hieu suat'][9]=81.527654
df1['Hieu suat'][37]=79.527654
df1['Hieu suat'][35]=79.627654
df1['Hieu suat'][36]=79.427654
df1['Hieu suat'][32]=80.327654
df1['Hieu suat'][33]=80.227654
df1['Hieu suat'][34]=80.127654
print(df1)
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df1['date'],
    y=df1['Hieu suat'],
    name="Hiệu suất"))
fig . update_traces (
    showlegend = True 
    )
fig.update_xaxes(
    rangeslider_visible=True,
)
fig.update_layout(yaxis_range=[60, 100])
fig.show()

     month        date  ngay  Hieu suat
0        3  2022-03-22     0  82.665928
1        3  2022-03-23     1  82.909011
2        3  2022-03-24     2  82.861872
3        3  2022-03-25     3  82.627341
4        3  2022-03-26     4  82.468742
..     ...         ...   ...        ...
149      8  2022-08-19   150  77.732727
150      8  2022-08-20   151  77.724142
151      8  2022-08-21   152  77.635580
152      8  2022-08-22   153  77.663887
153      8  2022-08-23   154  77.741234

[154 rows x 4 columns]




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/i

In [26]:
x=df1['ngay']
y=df1['Hieu suat']
mymodel = np.poly1d(np.polyfit(x, y, 1))

myline = np.linspace(0, 154, 100)

fig1 = go.Figure()
fig1.add_trace(go.Scatter(
    x=x,
    y=y,
    name="Hiệu suất"))
fig1.add_trace(go.Scatter(
    x=myline,
    y=mymodel(myline),
    name="Xu hướng"))
fig1.update_layout(yaxis_range=[70, 90])
fig1.update_layout(height=600)
fig1.update_layout(width=800)
print(r2_score(y, mymodel(x)))
fig1.update_layout(
    xaxis_title="Ngày",
    yaxis_title="Hiệu suất (Kw)"
)
fig1.show()

0.6873782284488827


In [27]:
df2=df1[df1['month']<6]
print(df2)
x2=df2['ngay']
y2=df2['Hieu suat']
mymodel2 = np.poly1d(np.polyfit(x2, y2, 1))

myline2 = np.linspace(0, 70, 100)

fig2 = go.Figure()
fig2.add_trace(go.Scatter(
    x=x2,
    y=y2,
    name="Hiệu suất"))
fig2.add_trace(go.Scatter(
    x=myline2,
    y=mymodel2(myline2),
    name="Hiệu suất tuyến tính tháng 3,4,5"))
fig2.update_layout(yaxis_range=[60, 90])
dpi = 200
fig2.update_layout(dragmode='zoom', width=800, height=600)
print(r2_score(y2, mymodel2(x2)))
fig2.update_layout(
    xaxis_title="Ngày",
    yaxis_title="Hiệu suất (Kw)",
)
fig2.add_annotation(text="y=82.1701-0.0624x", x=62.22222, y=78.28785,font=dict(color="red", size=14))
fig2.show()

    month        date  ngay  Hieu suat
0       3  2022-03-22     0  82.665928
1       3  2022-03-23     1  82.909011
2       3  2022-03-24     2  82.861872
3       3  2022-03-25     3  82.627341
4       3  2022-03-26     4  82.468742
..    ...         ...   ...        ...
65      5  2022-05-27    66  77.857409
66      5  2022-05-28    67  77.778722
67      5  2022-05-29    68  78.592994
68      5  2022-05-30    69  77.589288
69      5  2022-05-31    70  78.590612

[70 rows x 4 columns]
0.8547889915704935


In [28]:
df3=df1[df1['month']>=6]
print(df3)
x3=df3['ngay']
y3=df3['Hieu suat']
mymodel3 = np.poly1d(np.polyfit(x3, y3, 1))

myline3 = np.linspace(71, 154, 100)

fig3 = go.Figure()
fig3.add_trace(go.Scatter(
    x=x3,
    y=y3,
    name="Hiệu suất"))
fig3.add_trace(go.Scatter(
    x=myline3,
    y=mymodel3(myline3),
    name="Hiệu suất tuyến tính tháng 6,7,8"))
fig3.update_layout(yaxis_range=[60, 90])
fig3.update_layout(height=600)
fig3.update_layout(width=800)
print(r2_score(y3, mymodel3(x3)))
fig3.update_layout(
    xaxis_title="Ngày",
    yaxis_title="Hiệu suất (%)"
)
fig3.add_annotation(text="y=78.5794-0.0096x", x=138.0707, y=77.93446,font=dict(color="red", size=14))
fig3.show()

     month        date  ngay  Hieu suat
70       6  2022-06-01    71  78.442606
71       6  2022-06-02    72  78.279127
72       6  2022-06-03    73  78.072116
73       6  2022-06-04    74  78.970150
74       6  2022-06-05    75  78.554528
..     ...         ...   ...        ...
149      8  2022-08-19   150  77.732727
150      8  2022-08-20   151  77.724142
151      8  2022-08-21   152  77.635580
152      8  2022-08-22   153  77.663887
153      8  2022-08-23   154  77.741234

[84 rows x 4 columns]
0.273005785371632


In [29]:
df1=df[df['ngay']==29]
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df1.index,
    y=df1['Total input power(kW)'],
    name="Sản lượng"))
fig.add_trace(go.Scatter(
    x=df1.index,
    y=df1['Hieu suat'],
    name="Hiệu suất"))
fig . update_traces (
    showlegend = True 
    )
fig.update_xaxes(
    rangeslider_visible=True,
)
fig.show()

In [30]:
df1.loc[:,'ts']=pd.Series(((df.index.hour*60+df.index.minute)/5), df.index)
df1.loc[:,'tg']=pd.Series(((df.index.hour*60+df.index.minute)/5-df1['ts'][0]), df.index)
df1.head(len(df1))




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Unnamed: 0.1,Unnamed: 0,Site Name,Management Domain,ManageObject_x,Ambient temperature(℃),PV Temperature(℃),Wind speed(m/s),Wind direction(°),Daily irradiation(MJ/㎡),...,hourofweek,hourofmonth,date,date_ordinal,ngay,Tcell,Pm,Hieu suat,ts,tg
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-20 06:00:00,7147,3015,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,23.0,23.9,3.0,,0.020,...,54,462,2022-04-20,738265,29,23.101250,2460.780133,71.359484,72.0,0.0
2022-04-20 06:05:00,7148,3016,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,23.3,24.2,1.0,,0.026,...,54,462,2022-04-20,738265,29,23.420625,2927.492494,71.904540,73.0,1.0
2022-04-20 06:10:00,7149,3017,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,23.3,24.2,0.8,,0.032,...,54,462,2022-04-20,738265,29,23.436875,3321.628524,73.638578,74.0,2.0
2022-04-20 06:15:00,7150,3018,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,23.3,24.4,1.4,,0.041,...,54,462,2022-04-20,738265,29,23.537500,5760.964762,73.720291,75.0,3.0
2022-04-20 06:20:00,7151,3019,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,23.3,24.5,1.5,,0.052,...,54,462,2022-04-20,738265,29,23.550000,6063.834554,74.919590,76.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-20 17:20:00,7283,3151,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.4,26.5,1.0,,9.599,...,65,473,2022-04-20,738265,29,25.688750,6936.759373,82.963812,208.0,136.0
2022-04-20 17:25:00,7284,3152,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.2,26.1,2.4,,9.611,...,65,473,2022-04-20,738265,29,25.405000,4931.106765,78.238014,209.0,137.0
2022-04-20 17:30:00,7285,3153,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.1,25.9,2.5,,9.619,...,65,473,2022-04-20,738265,29,25.264375,3956.412379,77.747204,210.0,138.0
2022-04-20 17:35:00,7286,3154,VNM - Đà Nẵng - Nhà Máy,/BCG Energy,1020C0058922/EM001020C0058922,25.0,25.6,3.1,,9.626,...,65,473,2022-04-20,738265,29,25.114375,2754.801381,73.580622,211.0,139.0


In [31]:
x=df1['tg']
y=df1['Hieu suat']
mymodel = np.poly1d(np.polyfit(x, y, 10))

myline = np.linspace(0, len(df1)-2, 100)

fig1 = go.Figure()
fig1.add_trace(go.Scatter(
    x=x,
    y=y,
    name="Hiệu suất"))
fig1.add_trace(go.Scatter(
    x=myline,
    y=mymodel(myline),
    name="Hiệu suất khớp"))
fig1.add_trace(go.Scatter(
    x=x,
    y=df1['Total input power(kW)'],
    name="Sản lượng"))

In [32]:
fig1 = go.Figure()
fig1.add_trace(go.Scatter(
    x=x,
    y=y,
    name="Hiệu suất"))
fig1.add_trace(go.Scatter(
    x=myline,
    y=mymodel(myline),
    name="Hiệu suất khớp"))
fig1.add_trace(go.Scatter(
    x=x,
    y=df1['Total input power(kW)'],
    name="Sản lượng"))