# import

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np 
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings('ignore')

# data

In [3]:
data_path = (r'..\data\data_ready.csv')
traffic_path = (r'..\data\traffic_data.xlsx')

In [4]:
data = pd.read_csv(data_path,index_col=0)
traffic = pd.read_excel(traffic_path, engine='openpyxl', sheet_name='udaljenosti', index_col=0)

# funkcije za normalzaciju traffica-a

In [5]:
def traffic_norm(x):
    df=traffic.drop(x,axis=0)
    df=df[[x]]
    df['norm']=df[[x]]/df[[x]].max()
    df=df.drop(x,axis=1)
    df = df.filter(like='_80',axis=0)
    df=df.sort_index()
    return df

In [6]:
def traffic_s_norm(data,y):
    x = data.copy()
    x1 = x.filter(like='_80',axis=1)
    x1 = x1.sort_index(axis=1)
    for i,j in zip(x1.columns,y.norm):
        x1[i]=x1[i]*j
    return x1

# funckcija za podatke 

In [7]:
def data_sites(site_1,pollutant_1):
    df = data.copy()
    df2 = df[[pollutant_1]]
    df1 = df.filter(like=site_1, axis=1)
    df3 = df[['Cloud_Cover_Mean','Temperature_Air_2m_Max_Day_Time', 'Temperature_Air_2m_Min_Night_Time','Wind_Speed_10m_Mean']]
    df4 = df[['year', 'dayofyear', 'month_Apr', 'month_Aug', 'month_Dec',
       'month_Feb', 'month_Jan', 'month_Jul', 'month_Jun', 'month_Mar',
       'month_May', 'month_Nov', 'month_Oct', 'month_Sep', 'weekday_Friday',
       'weekday_Monday', 'weekday_Saturday', 'weekday_Sunday',
       'weekday_Thursday', 'weekday_Tuesday', 'weekday_Wednesday',
       'season_fall', 'season_spring', 'season_summer', 'season_winter', 'holiday', 'holiday_school']]
    d = pd.concat([df1,df2,df3,df4],axis=1)
    X = d.drop(pollutant_1,axis=1)
    Y = d[[pollutant_1]] 
    return X,Y

# funkcija bez traffica 

In [8]:
def data_sites_bez_traffic(site_1,pollutant_1):
    df = data.copy()
    df2 = df[[pollutant_1]]
    df1 = df.filter(like=site_1, axis=1)
    df3 = df[['Cloud_Cover_Mean','Temperature_Air_2m_Max_Day_Time', 'Temperature_Air_2m_Min_Night_Time','Wind_Speed_10m_Mean']]
    df4 = df[['year', 'dayofyear', 'month_Apr', 'month_Aug', 'month_Dec',
       'month_Feb', 'month_Jan', 'month_Jul', 'month_Jun', 'month_Mar',
       'month_May', 'month_Nov', 'month_Oct', 'month_Sep', 'weekday_Friday',
       'weekday_Monday', 'weekday_Saturday', 'weekday_Sunday',
       'weekday_Thursday', 'weekday_Tuesday', 'weekday_Wednesday',
       'season_fall', 'season_spring', 'season_summer', 'season_winter', 'holiday', 'holiday_school']]
    d = pd.concat([df1,df2,df3,df4],axis=1)
    df5 = d.filter(like='_80', axis=1)
    
    X = d.drop(df5.columns,axis=1)
    X = d.drop(pollutant_1,axis=1)
    Y = d[[pollutant_1]] 
    return X,Y

# funkcija za norm traffic

In [9]:
def data_sites_s_norm_traffic(site_1,pollutant_1,traffic_1):
    df = data.copy()
    df_t = traffic_1.copy()
    df_t = df_t.filter(like=site_1,axis=1)
    df2 = df[[pollutant_1]]
    df1 = df.filter(like=site_1, axis=1)
    df3 = df[['Cloud_Cover_Mean','Temperature_Air_2m_Max_Day_Time', 'Temperature_Air_2m_Min_Night_Time','Wind_Speed_10m_Mean']]
    df4 = df[['year', 'dayofyear', 'month_Apr', 'month_Aug', 'month_Dec',
       'month_Feb', 'month_Jan', 'month_Jul', 'month_Jun', 'month_Mar',
       'month_May', 'month_Nov', 'month_Oct', 'month_Sep', 'weekday_Friday',
       'weekday_Monday', 'weekday_Saturday', 'weekday_Sunday',
       'weekday_Thursday', 'weekday_Tuesday', 'weekday_Wednesday',
       'season_fall', 'season_spring', 'season_summer', 'season_winter', 'holiday', 'holiday_school']]
    d = pd.concat([df1,df2,df3,df4],axis=1)
    df5 = d.filter(like='_80', axis=1)
    X = d.drop(df5.columns,axis=1)
    X = pd.concat([X,df_t],axis=1)
    X = X.drop(pollutant_1,axis=1)
    Y = d[[pollutant_1]] 
    return X,Y

# fukcija za data

In [10]:
def prep_data(X,Y):
    X_train = X.loc[:'2020-01-02']
    X_test = X.loc['2020-01-03':'2020-03-10']
    y_train = Y.loc[:'2020-01-02'].values.reshape((-1, 1))
    y_test = Y.loc['2020-01-03':'2020-03-10'].values.reshape((-1, 1))
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    X_train_t = torch.FloatTensor(X_train)
    Y_train_t = torch.FloatTensor(y_train).reshape(-1,1) 
    X_test_t = torch.FloatTensor(X_test)
    return X_train_t,X_test_t,Y_train_t,y_test,X_train

# funckija model

In [11]:
def model_torch(input_size,hidden_size,hidden_size_1,hidden_size_2,hidden_size_3):
    output_size=1
    model_t = torch.nn.Sequential(torch.nn.Linear(input_size, hidden_size),
                                  torch.nn.ReLU(),
                                  torch.nn.Linear(hidden_size, hidden_size_1),
                                  torch.nn.ReLU(),
                                  torch.nn.Linear(hidden_size_1, hidden_size_2),
                                  torch.nn.ReLU(),
                                  torch.nn.Linear(hidden_size_2, hidden_size_3),
                                  torch.nn.ReLU(),
                                  torch.nn.Linear(hidden_size_3, output_size)) 
    return model_t

# funcija trening

In [12]:
def training(model,epochs,X_tr,Y_tr):
    error = []
    for e in range(epochs):
    #X_train_t = torch.FloatTensor(xtrain)  #Converting numpy array to torch tensor
    
        y_pred = torch_model(X_tr)
        loss = loss_func(y_pred, Y_tr)
        loss.backward()
        optimizer.step()
    
        optimizer.zero_grad()

        error.append(loss.item())
    return error

# sites i polutanti 

In [13]:
sites = ['Nord','Sud','West','Ost','DonBosco'] 
pollutant_O3 = ['N_O3','S_O3'] 
pollutant_PM10 = ['N_PM10K','S_PM10K','W_PM10K','O_PM10K','D_PM10K'] 
pollutant_NO2 = ['N_NO2','S_NO2','W_NO2','O_NO2','D_NO2'] 


# hyperparametri

In [14]:
sc = StandardScaler()

In [15]:
# epochs = 4000
# hidden_size = 300
# hidden_size_1 = 150
# hidden_size_2 = 100
# hidden_size_3 = 80


# learning_rate = 0.01

# funkcije 

In [16]:
# data_sites(site_1,pollutant_1) svi podaci 
# data_sites_bez_traffic(site_1,pollutant_1) svi podaci bez traffic 
# data_sites_s_norm_traffic(site_1,pollutant_1,traffic_1) s normaliziranim trafficom

# loop for train all data

In [17]:
# for site,pollutant in zip(sites,pollutant_NO2):
#     temp_traffic = traffic_norm(site) # normalizirani promet
#     norm_traffic = traffic_s_norm(data,temp_traffic) #normalizirani promet
#     X,Y = data_sites_bez_traffic(site,pollutant)
#     X_train_t,X_test_t,Y_train_t,y_test,X_train=prep_data(X,Y)
#     input_size = X_train.shape[1]
#     torch_model = model_torch(input_size,hidden_size,hidden_size_1,hidden_size_2,hidden_size_3)
#     loss_func = torch.nn.MSELoss() #mean square error as loss metric
#     optimizer = torch.optim.Adam(torch_model.parameters(), lr=learning_rate)
#     train_error = training(torch_model,epochs,X_train_t,Y_train_t)
# #     plt.plot(train_error)
# #     plt.ylabel('Loss')
# #     plt.title('Training Loss')
# #     plt.show()
#     ypredict = torch_model(X_test_t)
#     ypredict_np = ypredict.detach().numpy()
#     print(f'{site} {pollutant}')
#     print(f'R2_Score:', r2_score(y_test, ypredict_np))
#     print(f'MSE:', mean_squared_error(y_test, ypredict_np, squared=True))
    
#     plt.plot(y_test)#_inverse)
#     plt.plot(ypredict_np)#_inverse)
#     plt.show()
    

# loop za trening


In [24]:
epohe = [2000,3000,5000]

hidden_sizes = [10,50,100,200,300,500,1000]
hidden_sizes_1 = [8,40,80,150,200,350,500]
hidden_sizes_2 = [4,30,40,100,130,200,300]
hidden_sizes_3 = [2,15,20,50,80,100,150]

learning_rates = [0.1,0.01,0.001,0.0001,0.00001]

In [None]:
dictionary = {'Site':[],'pollutant':[],'R2':[],'MSE':[],'Epoch':[],'hidden':[],'lr':[]}
for epochs in epohe: 
    print(f'Start {epochs}')
    for hidden_size,hidden_size_1,hidden_size_2,hidden_size_3 in zip(hidden_sizes,hidden_sizes_1,hidden_sizes_2,hidden_sizes_3):
        hidden = []
        hidden.extend([hidden_size,hidden_size_1,hidden_size_2,hidden_size_3])
        print(f'Start {hidden}')
        for lr in learning_rates:
            print(f'Start {lr}')
            for site,pollutant in zip(sites,pollutant_PM10):
                temp_traffic = traffic_norm(site) # normalizirani promet
                norm_traffic = traffic_s_norm(data,temp_traffic) #normalizirani promet
                X,Y = data_sites_bez_traffic(site,pollutant)
                X_train_t,X_test_t,Y_train_t,y_test,X_train=prep_data(X,Y)
                input_size = X_train.shape[1]
                torch_model = model_torch(input_size,hidden_size,hidden_size_1,hidden_size_2,hidden_size_3)
                loss_func = torch.nn.MSELoss() #mean square error as loss metric
                optimizer = torch.optim.Adam(torch_model.parameters(), lr=lr)
                train_error = training(torch_model,epochs,X_train_t,Y_train_t)
                ypredict = torch_model(X_test_t)
                ypredict_np = ypredict.detach().numpy()
                r2 = r2_score(y_test, ypredict_np)
                mse = mean_squared_error(y_test, ypredict_np, squared=True)
                dictionary['Site'].append(site)
                dictionary['pollutant'].append(pollutant)
                dictionary['R2'].append(r2)
                dictionary['MSE'].append(mse)
                dictionary['lr'].append(lr)
                dictionary['Epoch'].append(epochs)
                dictionary['hidden'].append(hidden)
            print(f'Done {lr}')
        print(f'Done {hidden}')
    print(f'Done {epochs}')

Start 2000
Start [10, 8, 4, 2]
Start 0.1
Done 0.1
Start 0.01
Done 0.01
Start 0.001
Done 0.001
Start 0.0001
Done 0.0001
Start 1e-05
Done 1e-05
Done [10, 8, 4, 2]
Start [50, 40, 30, 15]
Start 0.1
Done 0.1
Start 0.01
Done 0.01
Start 0.001
Done 0.001
Start 0.0001
Done 0.0001
Start 1e-05
Done 1e-05
Done [50, 40, 30, 15]
Start [100, 80, 40, 20]
Start 0.1
Done 0.1
Start 0.01
Done 0.01
Start 0.001
Done 0.001
Start 0.0001
Done 0.0001
Start 1e-05
Done 1e-05
Done [100, 80, 40, 20]
Start [200, 150, 100, 50]
Start 0.1
Done 0.1
Start 0.01
Done 0.01
Start 0.001
Done 0.001
Start 0.0001
Done 0.0001
Start 1e-05
Done 1e-05
Done [200, 150, 100, 50]
Start [300, 200, 130, 80]
Start 0.1
Done 0.1
Start 0.01
Done 0.01
Start 0.001
Done 0.001
Start 0.0001
Done 0.0001
Start 1e-05
Done 1e-05
Done [300, 200, 130, 80]
Start [500, 350, 200, 100]
Start 0.1
Done 0.1
Start 0.01
Done 0.01
Start 0.001
Done 0.001
Start 0.0001
Done 0.0001
Start 1e-05
Done 1e-05
Done [500, 350, 200, 100]
Start [1000, 500, 300, 150]
Start 0.1

In [None]:
test = pd.DataFrame.from_dict(dictionary)

In [None]:
test

In [23]:
test.to_excel('test1.xlsx')