In [37]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import LocalOutlierFactor
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
import seaborn as sns
import random

def get_model(n_inputs, n_outputs):
   model = Sequential()
   model.add(Dense(128, input_dim=n_inputs, activation='relu'))
   model.add(Dense(48, activation='relu'))
   model.add(Dense(n_outputs))
   model.compile(loss='mae', optimizer='adam')
   return model

so2group = ['SulphurDioxide_SO2_column_number_density','SulphurDioxide_SO2_column_number_density_amf',
            'SulphurDioxide_SO2_slant_column_number_density','SulphurDioxide_cloud_fraction',
            'SulphurDioxide_sensor_azimuth_angle','SulphurDioxide_sensor_azimuth_angle',
            'SulphurDioxide_solar_zenith_angle','SulphurDioxide_SO2_column_number_density_15km']

cogroup = ['CarbonMonoxide_CO_column_number_density','CarbonMonoxide_H2O_column_number_density',
           'CarbonMonoxide_cloud_height','CarbonMonoxide_sensor_altitude',
           'CarbonMonoxide_sensor_azimuth_angle','CarbonMonoxide_sensor_zenith_angle',
           'CarbonMonoxide_solar_azimuth_angle','CarbonMonoxide_solar_zenith_angle']

#below %20 so excluded from dataset
no2group = ['NitrogenDioxide_NO2_column_number_density','NitrogenDioxide_tropospheric_NO2_column_number_density',
            'NitrogenDioxide_stratospheric_NO2_column_number_density','NitrogenDioxide_NO2_slant_column_number_density',
            'NitrogenDioxide_tropopause_pressure','NitrogenDioxide_absorbing_aerosol_index',
            'NitrogenDioxide_cloud_fraction','NitrogenDioxide_sensor_altitude',
            'NitrogenDioxide_sensor_azimuth_angle','NitrogenDioxide_sensor_zenith_angle',
            'NitrogenDioxide_solar_azimuth_angle','NitrogenDioxide_solar_zenith_angle']

ch2ogroup = ['Formaldehyde_tropospheric_HCHO_column_number_density','Formaldehyde_tropospheric_HCHO_column_number_density_amf',
             'Formaldehyde_HCHO_slant_column_number_density','Formaldehyde_cloud_fraction',
             'Formaldehyde_solar_zenith_angle','Formaldehyde_solar_azimuth_angle',
             'Formaldehyde_sensor_zenith_angle','Formaldehyde_sensor_azimuth_angle']

uvindex = ['UvAerosolIndex_absorbing_aerosol_index','UvAerosolIndex_sensor_altitude',
             'UvAerosolIndex_sensor_azimuth_angle','UvAerosolIndex_sensor_zenith_angle',
             'UvAerosolIndex_sensor_zenith_angle','UvAerosolIndex_solar_zenith_angle']

o3group = ['Ozone_O3_column_number_density','Ozone_O3_column_number_density_amf',
           'Ozone_O3_slant_column_number_density','Ozone_O3_effective_temperature',
           'Ozone_cloud_fraction','Ozone_sensor_azimuth_angle','Ozone_sensor_zenith_angle',
           'Ozone_solar_azimuth_angle','Ozone_solar_zenith_angle']

#below %20 so excluded from dataset
uvlayer = ['UvAerosolLayerHeight_aerosol_height','UvAerosolLayerHeight_aerosol_pressure',
           'UvAerosolLayerHeight_aerosol_optical_depth','UvAerosolLayerHeight_sensor_zenith_angle',
           'UvAerosolLayerHeight_sensor_azimuth_angle','UvAerosolLayerHeight_solar_azimuth_angle',
           'UvAerosolLayerHeight_solar_zenith_angle']

cloud = ['Cloud_cloud_fraction','Cloud_cloud_top_pressure','Cloud_cloud_top_height',
         'Cloud_cloud_base_pressure','Cloud_cloud_base_height','Cloud_cloud_optical_depth',
         'Cloud_surface_albedo','Cloud_sensor_azimuth_angle','Cloud_sensor_zenith_angle',
         'Cloud_solar_azimuth_angle','Cloud_solar_zenith_angle']

trainset = pd.read_csv('emission/train.csv')
testset = pd.read_csv('emission/test.csv')

In [38]:
a = trainset.isnull().sum()/len(dataset)*100
variables = trainset.columns
variable = [ ]
for i in range(trainset.columns.shape[0]):
    if a[i]<=20: #setting the threshold as 20%
        variable.append(variables[i])
trainset = trainset[variable]
trainset = trainset.drop(['ID_LAT_LON_YEAR_WEEK'], axis=1)
trainset = trainset.fillna(trainset.mean())
trainset = trainset.drop(trainset[trainset['emission'] > 600].index)
trainset

Unnamed: 0,latitude,longitude,year,week_no,SulphurDioxide_SO2_column_number_density,SulphurDioxide_SO2_column_number_density_amf,SulphurDioxide_SO2_slant_column_number_density,SulphurDioxide_cloud_fraction,SulphurDioxide_sensor_azimuth_angle,SulphurDioxide_sensor_zenith_angle,...,Cloud_cloud_top_height,Cloud_cloud_base_pressure,Cloud_cloud_base_height,Cloud_cloud_optical_depth,Cloud_surface_albedo,Cloud_sensor_azimuth_angle,Cloud_sensor_zenith_angle,Cloud_solar_azimuth_angle,Cloud_solar_zenith_angle,emission
0,-0.510,29.290,2019,0,-0.000108,0.603019,-0.000065,0.255668,-98.593887,50.843559,...,3664.436218,61085.809570,2615.120483,15.568533,0.272292,-12.628986,35.632416,-138.786423,30.752140,3.750994
1,-0.510,29.290,2019,1,0.000021,0.728214,0.000014,0.130988,16.592861,39.137194,...,3651.190311,66969.478735,3174.572424,8.690601,0.256830,30.359375,39.557633,-145.183930,27.251779,4.025176
2,-0.510,29.290,2019,2,0.000514,0.748199,0.000385,0.110018,72.795837,52.868816,...,4216.986492,60068.894448,3516.282669,21.103410,0.251101,15.377883,30.401823,-142.519545,26.193296,4.231381
3,-0.510,29.290,2019,3,0.000048,0.834848,0.000035,0.158418,-7.925870,37.436189,...,5228.507736,51064.547339,4180.973322,15.386899,0.262043,-11.293399,24.380357,-132.665828,28.829155,4.305286
4,-0.510,29.290,2019,4,-0.000079,0.676296,-0.000048,0.121164,4.121269,35.515587,...,3980.598120,63751.125781,3355.710107,8.114694,0.235847,38.532263,37.392979,-141.509805,22.204612,4.347317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79018,-3.299,30.301,2021,48,0.000284,1.195643,0.000340,0.191313,72.820518,55.988022,...,5459.185355,60657.101913,4590.879504,20.245954,0.304797,-35.140368,40.113533,-129.935508,32.095214,29.404171
79019,-3.299,30.301,2021,49,0.000083,1.130868,0.000063,0.177222,-12.856753,19.435339,...,5606.449457,60168.191528,4659.130378,6.104610,0.314015,4.667058,47.528435,-134.252871,30.771469,29.186497
79020,-3.299,30.301,2021,50,0.000048,0.834848,0.000035,0.158418,-7.925870,37.436189,...,6222.646776,56596.027209,5222.646823,14.817885,0.288058,-0.340922,35.328098,-134.731723,30.716166,29.131205
79021,-3.299,30.301,2021,51,-0.000034,0.879397,-0.000028,0.184209,-100.344827,32.599393,...,7896.456885,46533.348194,6946.858022,32.594768,0.274047,8.427699,48.295652,-139.447849,29.112868,28.125792


In [39]:
testset = testset.drop(['ID_LAT_LON_YEAR_WEEK'], axis=1)
testset = testset.drop(no2group, axis=1)
testset = testset.drop(uvlayer, axis=1)
testset = testset.fillna(testset.mean())
testset

Unnamed: 0,latitude,longitude,year,week_no,SulphurDioxide_SO2_column_number_density,SulphurDioxide_SO2_column_number_density_amf,SulphurDioxide_SO2_slant_column_number_density,SulphurDioxide_cloud_fraction,SulphurDioxide_sensor_azimuth_angle,SulphurDioxide_sensor_zenith_angle,...,Cloud_cloud_top_pressure,Cloud_cloud_top_height,Cloud_cloud_base_pressure,Cloud_cloud_base_height,Cloud_cloud_optical_depth,Cloud_surface_albedo,Cloud_sensor_azimuth_angle,Cloud_sensor_zenith_angle,Cloud_solar_azimuth_angle,Cloud_solar_zenith_angle
0,-0.510,29.290,2022,0,0.000014,0.792570,0.000009,0.139218,9.298052,36.174307,...,36022.027344,8472.313477,41047.937500,7472.313477,7.935617,0.240773,-100.113792,33.697044,-133.047546,33.779583
1,-0.510,29.290,2022,1,0.000456,0.691164,0.000316,0.000000,76.239196,15.600607,...,48539.737242,6476.147323,54915.708579,5476.147161,11.448437,0.293119,-30.510319,42.402593,-138.632822,31.012380
2,-0.510,29.290,2022,2,0.000161,0.605107,0.000106,0.079870,-42.055341,39.889060,...,34133.080469,8984.795703,39006.093750,7984.795703,10.753179,0.267130,39.087361,45.936480,-144.784988,26.743361
3,-0.510,29.290,2022,3,0.000350,0.696917,0.000243,0.201028,72.169566,58.862543,...,50854.991076,6014.724059,57646.368368,5014.724115,11.764556,0.304679,-24.465127,42.140419,-135.027891,29.604774
4,-0.510,29.290,2022,4,-0.000317,0.580527,-0.000184,0.204352,76.190865,15.646016,...,46594.685145,6849.280477,52896.541873,5849.280394,13.065317,0.284221,-12.907850,30.122641,-135.500119,26.276807
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24348,-3.299,30.301,2022,44,-0.000618,0.745549,-0.000461,0.234492,72.306198,61.114494,...,48839.430415,6260.120033,55483.459980,5260.120056,30.398508,0.180046,-25.528588,45.284576,-116.521412,29.992562
24349,-3.299,30.301,2022,45,0.000014,0.792570,0.000009,0.139218,9.298052,36.174307,...,47042.694849,6678.843299,53589.917383,5678.951521,19.223844,0.177833,-13.380005,43.770351,-122.405759,29.017975
24350,-3.299,30.301,2022,46,0.000014,0.792570,0.000009,0.139218,9.298052,36.174307,...,55337.148173,5336.282475,62646.761340,4336.282491,13.801194,0.219471,-5.072065,33.226455,-124.530639,30.187472
24351,-3.299,30.301,2022,47,0.000071,1.003805,0.000077,0.205077,74.327427,38.215228,...,44813.691428,7188.578533,50728.313991,6188.578464,27.887489,0.247275,-0.668714,45.885617,-129.006797,30.427455


In [40]:
x_train = trainset.drop(['emission'], axis=1).values
y_train = (trainset['emission']).values
x_test = testset.values 

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

n_inputs, n_outputs = 55, 1
model = get_model(n_inputs, n_outputs)
history = model.fit(x_train, y_train, verbose=0, epochs=100, batch_size=100)

y_pred = model.predict(x_test)



In [44]:
testset = pd.read_csv('emission/test.csv')
testset['emission'] = pd.DataFrame(y_pred)
output = (testset[['ID_LAT_LON_YEAR_WEEK','emission']])
output

Unnamed: 0,ID_LAT_LON_YEAR_WEEK,emission
0,ID_-0.510_29.290_2022_00,6.331381
1,ID_-0.510_29.290_2022_01,12.373186
2,ID_-0.510_29.290_2022_02,15.297417
3,ID_-0.510_29.290_2022_03,0.902306
4,ID_-0.510_29.290_2022_04,-2.770660
...,...,...
24348,ID_-3.299_30.301_2022_44,35.073257
24349,ID_-3.299_30.301_2022_45,36.624901
24350,ID_-3.299_30.301_2022_46,16.665634
24351,ID_-3.299_30.301_2022_47,22.881989


In [46]:
output.to_csv('output.csv',mode = 'w', index=False)  