In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("2017_2019.csv")
df.drop(['Unnamed: 18'],inplace=True,axis=1)

In [3]:
dates = set()
for i,data in df.iterrows():
    dates.add(pd.to_datetime( 
                             str(data['Year'])[:-2]+'-'+str(data['Month'])[:-2] + '-' + str(data['Day'])[:-2],
                             format="%Y-%m-%d"
                             ))

In [4]:
def get_day(df:pd.DataFrame,day:pd.Timestamp) -> pd.DataFrame:
    
    return df.loc[ (df ['Year'] == day.year) & 
                  (df['Day'] == day.day) &
                  (df['Month'] == day.month)]
    
def get_records_night(df:pd.DataFrame,day:pd.Timestamp):
    records_beafore_df = get_n_days_beafore(df,day,1)
    records_beafore_df = records_beafore_df.loc[(records_beafore_df.Hour > 12)  & (records_beafore_df["Clearsky DNI"] != 0)]
    
    records_df = get_day(df,day)
    records_df = records_df.loc[(records_df.Hour < 12) &( records_df.DNI == 0) &(records_df["Clearsky DNI"] != 0)]
    
    return(pd.concat([records_beafore_df,records_df]))
    
    
  
def get_n_days_beafore(df:pd.DataFrame,day:pd.Timestamp,n:int):
    day_beafore = day - pd.offsets.Day(n)
    return get_day(df,day_beafore)

def get_working_day(df:pd.DataFrame,day:pd.Timestamp):
    records = get_day(df,day)
    return records.loc[(records.DNI != 0) | (records["Clearsky DNI"] != 0)]

    
    

In [5]:
_ = pd.Series( list(dates)).sort_values()
get_records_night(df,_.iloc[2])

Unnamed: 0,Year,Month,Day,Hour,Minute,Temperature,Clearsky DHI,Clearsky DNI,Clearsky GHI,Dew Point,DHI,DNI,GHI,Relative Humidity,Solar Zenith Angle,Surface Albedo,Pressure,Wind Speed
148,2017,1,2,13,0,7.5,69,738,296,-1.7,69,738,296,52.3,72.09,0.13,1000,0.9
149,2017,1,2,13,15,7.2,65,702,258,-1.7,65,702,258,53.38,74.03,0.13,1000,0.8
150,2017,1,2,13,30,6.8,60,658,219,-1.7,60,658,219,54.86,76.07,0.13,1000,0.8
151,2017,1,2,13,45,6.5,54,610,179,-2.4,54,610,179,52.89,78.21,0.13,1000,0.8
152,2017,1,2,14,0,6.1,48,542,138,-2.4,48,542,138,54.36,80.44,0.13,1000,0.7
153,2017,1,2,14,15,6.0,40,455,98,-2.4,40,455,98,54.74,82.74,0.13,1000,0.7
154,2017,1,2,14,30,5.9,31,342,60,-2.4,31,342,60,55.12,85.11,0.13,1000,0.6
155,2017,1,2,14,45,5.7,19,199,28,-3.1,19,199,28,52.99,87.49,0.13,1000,0.6
215,2017,1,3,5,45,3.8,19,155,25,0.7,9,0,9,80.07,87.59,0.13,1000,3.0
216,2017,1,3,6,0,4.3,31,285,55,0.7,20,0,20,77.31,85.21,0.13,1000,3.1


In [6]:
df[df.isna().any(axis=1)]

Unnamed: 0,Year,Month,Day,Hour,Minute,Temperature,Clearsky DHI,Clearsky DNI,Clearsky GHI,Dew Point,DHI,DNI,GHI,Relative Humidity,Solar Zenith Angle,Surface Albedo,Pressure,Wind Speed


In [7]:
records_df =pd.DataFrame()
for idx,date in enumerate( pd.Series( list(dates)).sort_values().values):
    
    date = pd.to_datetime(date)
    day = get_working_day(df,date)
    record = pd.DataFrame([{'date':date}])
    
    record['month'] = date.month
    record['len_day'] = len(day)
    record['temp_mean'] = day.Temperature.mean()
    record['press_mean'] = day.Pressure.mean()
    record['wind_mean'] = day["Wind Speed"].mean()
    
    
    night = get_records_night(df,date)
    
    if idx == 0:
        record['len_night'] = 0
    else:
        record['len_night'] = len(night)
        record['night_temp_mean'] = night.Temperature.mean()
        record['night_press_mean'] = night.Pressure.mean()
        record['night_wind_mean'] = night["Wind Speed"].mean()
    
    record['Y'] = (day["Clearsky DNI"].sum() + day['DNI'].sum()) / record['len_day']
    records_df = pd.concat([records_df,record])
records_df

Unnamed: 0,date,month,len_day,temp_mean,press_mean,wind_mean,len_night,Y,night_temp_mean,night_press_mean,night_wind_mean
0,2017-01-01,1,37,4.910811,1001.648649,1.878378,0,1397.243243,,,
0,2017-01-02,1,37,6.035135,1000.918919,1.335135,11,1391.621622,3.863636,1000.727273,1.709091
0,2017-01-03,1,37,8.983784,999.756757,2.970270,14,1068.054054,5.828571,1000.285714,1.835714
0,2017-01-04,1,37,10.167568,996.621622,3.432432,15,685.891892,8.826667,998.400000,2.533333
0,2017-01-05,1,37,14.021622,988.594595,6.070270,26,538.972973,12.834615,990.846154,5.188462
...,...,...,...,...,...,...,...,...,...,...,...
0,2019-12-27,12,37,8.959459,996.324324,3.143243,8,1206.540541,9.762500,989.875000,3.962500
0,2019-12-28,12,37,8.727027,991.810811,1.532432,22,595.216216,8.468182,993.545455,1.740909
0,2019-12-29,12,37,7.818919,992.486486,4.994595,19,636.756757,6.684211,992.157895,3.405263
0,2019-12-30,12,37,7.432432,987.621622,6.756757,33,566.675676,7.439394,988.303030,6.748485


In [8]:
records_df[records_df.isna().any(axis=1)]

Unnamed: 0,date,month,len_day,temp_mean,press_mean,wind_mean,len_night,Y,night_temp_mean,night_press_mean,night_wind_mean
0,2017-01-01,1,37,4.910811,1001.648649,1.878378,0,1397.243243,,,


In [145]:
records_df.dropna(inplace=True)
records_df.drop(["date"], inplace= True,axis= 1)

In [146]:
def get_Data_n_records(df, num, flatten_x=False):
    X = []
    Y = []
    for i in range(len(df)):
        if i < num+1:
            pass
        else:
            X.append(df.iloc[i-num-1:i-1].values)
            Y.append(df.iloc[i].Y)
    
    if flatten_x:  # Option to return one long row containing the X data
        # long_x = []
        X = np.array(X)
        first, second, third = X.shape
        return  X.reshape(first, second * third), np.array(Y)

    
    return np.array(X), np.array(Y)

X,Y = get_Data_n_records(records_df, 3)

In [151]:
TESTED_RECORD = 1

X, _ = get_Data_n_records(records_df, TESTED_RECORD)
X = np.array(X)
first, second, third = X.shape
X = X.reshape(first, second * third)

X_flattened, _ = get_Data_n_records(records_df, TESTED_RECORD, flatten_x=True)
assert X.shape == X_flattened.shape

for i in range(len(X)):
    if sum([X[i, x] != X_flattened[i, x] for x in range(X.shape[1])]) >= 1:
        control = [X[i, x] != X_flattened[i, x] for x in range(X.shape[1])]
assert (X == X_flattened).all()


assert pd.Series(X_flattened[1]).sum() == pd.Series(X[1]).sum()

(1092, 10)
(1092, 10)


In [133]:
f1 = pd.DataFrame(X_flattened)#[:, control])
f1.head()

(1092, 11)
(1092, 11)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,2017-01-02,1,37,6.035135,1000.918919,1.335135,11,1391.621622,3.863636,1000.727273,1.709091
1,2017-01-03,1,37,8.983784,999.756757,2.97027,14,1068.054054,5.828571,1000.285714,1.835714
2,2017-01-04,1,37,10.167568,996.621622,3.432432,15,685.891892,8.826667,998.4,2.533333
3,2017-01-05,1,37,14.021622,988.594595,6.07027,26,538.972973,12.834615,990.846154,5.188462
4,2017-01-06,1,37,13.254054,983.567568,9.543243,33,474.27027,13.8,985.090909,9.306061


In [134]:
f2 = pd.DataFrame(X)
f2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,2017-01-02,1,37,6.035135,1000.918919,1.335135,11,1391.621622,3.863636,1000.727273,1.709091
1,2017-01-03,1,37,8.983784,999.756757,2.97027,14,1068.054054,5.828571,1000.285714,1.835714
2,2017-01-04,1,37,10.167568,996.621622,3.432432,15,685.891892,8.826667,998.4,2.533333
3,2017-01-05,1,37,14.021622,988.594595,6.07027,26,538.972973,12.834615,990.846154,5.188462
4,2017-01-06,1,37,13.254054,983.567568,9.543243,33,474.27027,13.8,985.090909,9.306061


In [149]:
# import seaborn as sns
# sns.heatmap(f2 == f1)
# (f2 == f1)

In [13]:
!pip install keras

Collecting keras
  Downloading keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
     ---------------------------------------- 1.7/1.7 MB 6.3 MB/s eta 0:00:00
Installing collected packages: keras
Successfully installed keras-2.11.0


In [None]:
!conda install tensorflow

In [14]:
import pandas as pd
import numpy as np 
import keras as kr
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import tensorflow as tf

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
X.shape

In [None]:
X,Y = get_Data_n_records(records_df,3)


x_train,x_test,y_train,y_test = train_test_split(X,Y)

# regr = RandomForestRegressor(random_state=0)
# regr.fit(x_train, y_train)

mlp_model = kr.Sequential([
    kr.layers.Flatten(input_shape = (3,10)),
    kr.layers.Dense(128, activation = 'relu'),
    kr.layers.BatchNormalization(),
    kr.layers.Dropout(0.1),
    kr.layers.Dense(64, activation='relu'),
    kr.layers.BatchNormalization(),
    kr.layers.Dropout(0.1),
    kr.layers.Dense(1, activation = 'sigmoid')
])

mlp_model.compile()
history = mlp_model.fit(x_train, y_train, batch_size=128, epochs=100, validation_data=(x_test, y_test))