In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("2017_2019.csv")
df.drop(['Unnamed: 18'],inplace=True,axis=1)

In [3]:
dates = set()
for i,data in df.iterrows():
    dates.add(pd.to_datetime( 
                             str(data['Year'])[:-2]+'-'+str(data['Month'])[:-2] + '-' + str(data['Day'])[:-2],
                             format="%Y-%m-%d"
                             ))

In [4]:
def get_day(df:pd.DataFrame,day:pd.Timestamp) -> pd.DataFrame:
    
    return df.loc[ (df ['Year'] == day.year) & 
                  (df['Day'] == day.day) &
                  (df['Month'] == day.month)]
    
def get_records_night(df:pd.DataFrame,day:pd.Timestamp):
    records_beafore_df = get_n_days_beafore(df,day,1)
    records_beafore_df = records_beafore_df.loc[(records_beafore_df.Hour > 12)  & (records_beafore_df["Clearsky DNI"] != 0)]
    
    records_df = get_day(df,day)
    records_df = records_df.loc[(records_df.Hour < 12) &( records_df.DNI == 0) &(records_df["Clearsky DNI"] != 0)]
    
    return(pd.concat([records_beafore_df,records_df]))
    
    
  
def get_n_days_beafore(df:pd.DataFrame,day:pd.Timestamp,n:int):
    day_beafore = day - pd.offsets.Day(n)
    return get_day(df,day_beafore)

def get_working_day(df:pd.DataFrame,day:pd.Timestamp):
    records = get_day(df,day)
    return records.loc[(records.DNI != 0) | (records["Clearsky DNI"] != 0)]

    
    

In [5]:
_ = pd.Series( list(dates)).sort_values()
get_records_night(df,_.iloc[2])

Unnamed: 0,Year,Month,Day,Hour,Minute,Temperature,Clearsky DHI,Clearsky DNI,Clearsky GHI,Dew Point,DHI,DNI,GHI,Relative Humidity,Solar Zenith Angle,Surface Albedo,Pressure,Wind Speed
148,2017,1,2,13,0,7.5,69,738,296,-1.7,69,738,296,52.3,72.09,0.13,1000,0.9
149,2017,1,2,13,15,7.2,65,702,258,-1.7,65,702,258,53.38,74.03,0.13,1000,0.8
150,2017,1,2,13,30,6.8,60,658,219,-1.7,60,658,219,54.86,76.07,0.13,1000,0.8
151,2017,1,2,13,45,6.5,54,610,179,-2.4,54,610,179,52.89,78.21,0.13,1000,0.8
152,2017,1,2,14,0,6.1,48,542,138,-2.4,48,542,138,54.36,80.44,0.13,1000,0.7
153,2017,1,2,14,15,6.0,40,455,98,-2.4,40,455,98,54.74,82.74,0.13,1000,0.7
154,2017,1,2,14,30,5.9,31,342,60,-2.4,31,342,60,55.12,85.11,0.13,1000,0.6
155,2017,1,2,14,45,5.7,19,199,28,-3.1,19,199,28,52.99,87.49,0.13,1000,0.6
215,2017,1,3,5,45,3.8,19,155,25,0.7,9,0,9,80.07,87.59,0.13,1000,3.0
216,2017,1,3,6,0,4.3,31,285,55,0.7,20,0,20,77.31,85.21,0.13,1000,3.1


In [143]:
df['Dew Point']

0        -2.6
1        -2.6
2        -2.6
3        -2.7
4        -2.7
         ... 
105115    4.0
105116    3.9
105117    3.9
105118    3.9
105119    3.9
Name: Dew Point, Length: 105120, dtype: float64

In [6]:
df[df.isna().any(axis=1)]

Unnamed: 0,Year,Month,Day,Hour,Minute,Temperature,Clearsky DHI,Clearsky DNI,Clearsky GHI,Dew Point,DHI,DNI,GHI,Relative Humidity,Solar Zenith Angle,Surface Albedo,Pressure,Wind Speed


In [163]:
records_df =pd.DataFrame()
for idx,date in enumerate( pd.Series( list(dates)).sort_values().values):
    
    date = pd.to_datetime(date)
    day = get_working_day(df,date)
    record = pd.DataFrame([{'date':date}])
    
    record['month'] = date.month
    record['len_day'] = len(day)
    record['temp_mean'] = day.Temperature.mean()
    record['press_mean'] = day.Pressure.mean()
    record['wind_mean'] = day["Wind Speed"].mean()
    record['Dew_Point_mean'] = day['Dew Point'].mean()
    
    
    record['temp_max'] = day.Temperature.max()
    record['press_max'] = day.Pressure.max()
    record['wind_max'] = day["Wind Speed"].max()
    
    record['temp_min'] = day.Temperature.min()
    record['press_min'] = day.Pressure.min()
    record['wind_min'] = day["Wind Speed"].min()
    
    night = get_records_night(df,date)
    
    if idx == 0:
        record['len_night'] = 0
    else:
        record['len_night'] = len(night)
        record['night_temp_mean'] = night.Temperature.mean()
        record['night_press_mean'] = night.Pressure.mean()
        record['night_wind_mean'] = night["Wind Speed"].mean()
        
        record['night_temp_max'] = night.Temperature.max()
        record['night_press_max'] = night.Pressure.max()
        record['night_wind_max'] = night["Wind Speed"].max()
        
        record['night_temp_min'] = night.Temperature.min()
        record['night_press_min'] = night.Pressure.min()
        record['night_wind_min'] = night["Wind Speed"].min()
    
    record['Y'] = (day["Clearsky DNI"].sum() + day['DNI'].sum()) / record['len_day']
    records_df = pd.concat([records_df,record])
records_df

Unnamed: 0,date,month,len_day,temp_mean,press_mean,wind_mean,Dew_Point_mean,temp_max,press_max,wind_max,...,Y,night_temp_mean,night_press_mean,night_wind_mean,night_temp_max,night_press_max,night_wind_max,night_temp_min,night_press_min,night_wind_min
0,2017-01-01,1,37,4.910811,1001.648649,1.878378,-2.297297,7.3,1003,2.7,...,1397.243243,,,,,,,,,
0,2017-01-02,1,37,6.035135,1000.918919,1.335135,-2.008108,8.2,1002,2.0,...,1391.621622,3.863636,1000.727273,1.709091,6.7,1002.0,2.0,1.0,1000.0,1.6
0,2017-01-03,1,37,8.983784,999.756757,2.970270,3.662162,11.5,1001,3.6,...,1068.054054,5.828571,1000.285714,1.835714,7.5,1001.0,3.6,3.8,1000.0,0.6
0,2017-01-04,1,37,10.167568,996.621622,3.432432,6.948649,11.8,998,3.9,...,685.891892,8.826667,998.400000,2.533333,11.8,999.0,3.8,7.4,996.0,1.7
0,2017-01-05,1,37,14.021622,988.594595,6.070270,10.881081,14.7,990,6.5,...,538.972973,12.834615,990.846154,5.188462,14.7,995.0,6.5,9.4,988.0,2.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,2019-12-27,12,37,8.959459,996.324324,3.143243,2.900000,11.0,997,4.0,...,1206.540541,9.762500,989.875000,3.962500,11.0,991.0,4.4,8.6,989.0,3.7
0,2019-12-28,12,37,8.727027,991.810811,1.532432,2.745946,10.0,993,2.2,...,595.216216,8.468182,993.545455,1.740909,10.1,996.0,2.2,5.6,991.0,1.0
0,2019-12-29,12,37,7.818919,992.486486,4.994595,2.291892,9.9,993,5.6,...,636.756757,6.684211,992.157895,3.405263,9.4,993.0,5.5,3.5,991.0,1.2
0,2019-12-30,12,37,7.432432,987.621622,6.756757,4.213514,8.6,990,7.9,...,566.675676,7.439394,988.303030,6.748485,9.2,993.0,7.9,4.5,987.0,4.1


In [164]:
records_df[records_df.isna().any(axis=1)]

Unnamed: 0,date,month,len_day,temp_mean,press_mean,wind_mean,Dew_Point_mean,temp_max,press_max,wind_max,...,Y,night_temp_mean,night_press_mean,night_wind_mean,night_temp_max,night_press_max,night_wind_max,night_temp_min,night_press_min,night_wind_min
0,2017-01-01,1,37,4.910811,1001.648649,1.878378,-2.297297,7.3,1003,2.7,...,1397.243243,,,,,,,,,


In [165]:
def get_Data_n_records(df,num):
    X = []
    Y = []
    for i in range(len(df)):
        if i < num+1:
            pass
        else:
            X.append(df.iloc[i-num-1:i-1].values)
            Y.append(df.iloc[i].Y)
    
    
    return np.array(X),np.array(Y)

X,Y = get_Data_n_records(records_df,3)

In [166]:
import pandas as pd
import numpy as np 
import keras as kr
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import tensorflow as tf
from sklearn.preprocessing import normalize
from sklearn.metrics import recall_score,r2_score

In [167]:
records_df.drop(["date"], inplace= True,axis= 1)
records_df.dropna(inplace=True)

In [168]:
records_df[records_df.isna().any(axis=1)]

Unnamed: 0,month,len_day,temp_mean,press_mean,wind_mean,Dew_Point_mean,temp_max,press_max,wind_max,temp_min,...,Y,night_temp_mean,night_press_mean,night_wind_mean,night_temp_max,night_press_max,night_wind_max,night_temp_min,night_press_min,night_wind_min


In [170]:
X.shape

(1091, 3, 24)

In [179]:
num = 14
X,Y = get_Data_n_records(records_df,num)
scaler = skl.preprocessing.MinMaxScaler(feature_range=(0, 1))
scaler.fit(records_df)


Y = pd.Series(Y)
Y=(Y-Y.min())/(Y.max()-Y.min())
x_train,x_test,y_train,y_test = train_test_split(X,Y)
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train)

# regr = RandomForestRegressor(random_state=0)
# regr.fit(x_train, y_train)

model = kr.Sequential()
model.add(kr.layers.LSTM(5,input_shape=(num, X.shape[2])))
model.add(kr.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam',metrics=tf.keras.metrics.MeanAbsoluteError())
hist = model.fit(x_train, y_train, epochs=200, batch_size=64, validation_data=(x_val, y_val))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [178]:
r2_score(y_test.values,model.predict(x_test).reshape(-1,1))




-0.000354961724970293