In [98]:
import pandas as pd
import numpy as np
import warnings
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
from math import radians, sin, cos, sqrt, atan2
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import GridSearchCV
from scipy.spatial.distance import pdist, squareform
import dask.array as da
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import silhouette_score
from math import radians, sin, cos, sqrt, atan2
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from joblib import Parallel, delayed
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.preprocessing import StandardScaler


#Extra settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
warnings.filterwarnings("ignore")

In [99]:
#Loading train and test data
try:
    train_df=pd.read_csv('/home/anuragverma/Desktop/Kaggle/GeoAI Ground-level NO2 _Zindi/Datasets/Train.csv')
    test_df=pd.read_csv('/home/anuragverma/Desktop/Kaggle/GeoAI Ground-level NO2 _Zindi/Datasets/Test.csv')
    print("Train df shape: " ,train_df.shape)
    print("Test df shape: ", test_df.shape)

except FileNotFoundError:
    print('File not loaded')

Train df shape:  (86584, 14)
Test df shape:  (6576, 13)


In [100]:

def Prep_linear(df1):
    df=df1.copy()
    numeric_columns =['Precipitation','LST','AAI','CloudFraction','TropopausePressure','GT_NO2']
    
    for col in numeric_columns:
        # Fill missing values temporarily using linear interpolation
        data_interpolated = df[col].interpolate(method='linear')
        
        # Handle cases where interpolation might still leave NaNs at the ends
        if data_interpolated.isna().sum() > 0:
            data_interpolated.fillna(method='bfill', inplace=True)
            data_interpolated.fillna(method='ffill', inplace=True)

        # Decompose the time series to extract the trend component
        decomposition = seasonal_decompose(data_interpolated, model='additive', period=30)
        trend = decomposition.trend
        
        # Handle cases where the trend might still have NaNs at the ends
        trend.fillna(method='bfill', inplace=True)
        trend.fillna(method='ffill', inplace=True)

        # Replace original NaN values with the trend component
        df[col] = df[col].combine_first(trend)
        
        # Fill any remaining NaN values with the mean of the column
        df[col].fillna(value=df[col].mean(),inplace=True)

    return df

def Prep_linear_test(df1):
    df=df1.copy()
    numeric_columns =['Precipitation','LST','AAI','CloudFraction','TropopausePressure']
    
    for col in numeric_columns:
        # Fill missing values temporarily using linear interpolation
        data_interpolated = df[col].interpolate(method='linear')
        
        # Handle cases where interpolation might still leave NaNs at the ends
        if data_interpolated.isna().sum() > 0:
            data_interpolated.fillna(method='bfill', inplace=True)
            data_interpolated.fillna(method='ffill', inplace=True)

        # Decompose the time series to extract the trend component
        decomposition = seasonal_decompose(data_interpolated, model='additive', period=30)
        trend = decomposition.trend
        
        # Handle cases where the trend might still have NaNs at the ends
        trend.fillna(method='bfill', inplace=True)
        trend.fillna(method='ffill', inplace=True)

        # Replace original NaN values with the trend component
        df[col] = df[col].combine_first(trend)
        
        # Fill any remaining NaN values with the mean of the column
        df[col].fillna(value=df[col].mean(),inplace=True)

    return df

#Testing cubic interpolcation second



def Prep_spline(df1):
    df=df1.copy()
    numeric_columns =['NO2_strat','NO2_total','NO2_trop']
    
    for col in numeric_columns:
        # Fill missing values temporarily using linear interpolation
        data_interpolated = df[col].interpolate(method='spline',order=2)
        
        # Handle cases where interpolation might still leave NaNs at the ends
        if data_interpolated.isna().sum() > 0:
            data_interpolated.fillna(method='bfill', inplace=True)
            data_interpolated.fillna(method='ffill', inplace=True)

        # Decompose the time series to extract the trend component
        decomposition = seasonal_decompose(data_interpolated, model='additive', period=30)
        trend = decomposition.trend
        
        # Handle cases where the trend might still have NaNs at the ends
        trend.fillna(method='bfill', inplace=True)
        trend.fillna(method='ffill', inplace=True)

        # Replace original NaN values with the trend component
        df[col] = df[col].combine_first(trend)
        
        # Fill any remaining NaN values with the mean of the column
        df[col].fillna(value=df[col].mean(),inplace=True)

    return df

In [101]:
#Model with score 10.72 after filling missing values
train_model3_df_prep_mix=Prep_linear(train_df)
train_model3_df_prep_mix=Prep_spline(train_model3_df_prep_mix)

test_model3_df_prep_mix=Prep_linear_test(test_df)
test_model3_df_prep_mix=Prep_spline(test_model3_df_prep_mix)

# Select only numeric columns for both train and test datasets
train_model3_df_prep_mix = train_model3_df_prep_mix.select_dtypes(include=['number'])
test_model3_df_prep_mix = test_model3_df_prep_mix.select_dtypes(include=['number'])

# Separate the target variable 'GT_NO2' from the features in the training dataset
train_model3_df_prep_mix_GT_NO2_mix = train_model3_df_prep_mix['GT_NO2']
train_model3_df_prep_mix = train_model3_df_prep_mix.drop('GT_NO2', axis=1)


In [102]:
model3_mix = LinearRegression()
model3_mix.fit(train_model3_df_prep_mix, train_model3_df_prep_mix_GT_NO2_mix)

# Predict using the test dataset
y_pred3_mix = model3_mix.predict(test_model3_df_prep_mix)

# Add additional 'ID_Zindi' to y_pred3 and create a DataFrame
y_pred3_mix_df = pd.DataFrame(y_pred3_mix, columns=['GT_NO2'])
model3_mix_result_df = pd.concat([test_df['ID_Zindi'], y_pred3_mix_df], axis=1)

# Save the results to a CSV file
model3_mix_result_df.to_csv('/home/anuragverma/Desktop/Kaggle/GeoAI Ground-level NO2 _Zindi/model3_mix.csv', index=False)
#Best till now: 10.72134503

In [103]:
train_model3_df_prep_mix.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
LAT,86584.0,45.421456,0.225409,44.924694,45.249544,45.478996,45.601232,45.889734
LON,86584.0,10.014272,1.056637,8.736497,9.195325,9.611738,10.683357,12.590682
Precipitation,86584.0,2.585528,7.617394,0.0,0.0,0.0,0.0,135.396805
LST,86584.0,295.794353,11.539179,253.2,286.507764,296.40025,305.02,327.84
AAI,86584.0,-1.25516,0.702738,-5.196266,-1.743094,-1.334197,-0.845278,2.14302
CloudFraction,86584.0,0.238371,0.274434,0.0,0.03064,0.112237,0.372591,1.0
NO2_strat,86584.0,4.5e-05,1.2e-05,1.3e-05,3.6e-05,4.6e-05,5.5e-05,7.3e-05
NO2_total,86584.0,0.000157,9.6e-05,-1.2e-05,0.000103,0.000131,0.000175,0.002047
NO2_trop,86584.0,0.000104,7.4e-05,-2.5e-05,6e-05,8.9e-05,0.000123,0.001097
TropopausePressure,86584.0,16760.555697,3016.010268,8614.349685,14433.000528,16711.85407,19259.039003,24449.00715


In [104]:
#Checking skewness for all cols.
Skewed_cols=train_model3_df_prep_mix.skew()[abs(train_model3_df_prep_mix.skew())>0.5].index.to_list()
Skewed_cols.remove('LON')
print(train_model3_df_prep_mix.skew()[abs(train_model3_df_prep_mix.skew())>0.5])
print(Skewed_cols)

LON              0.911077
Precipitation    4.569618
CloudFraction    1.236881
NO2_total        4.169193
NO2_trop         2.935214
dtype: float64
['Precipitation', 'CloudFraction', 'NO2_total', 'NO2_trop']


In [105]:
#GT_NO2 is also skewed
train_df['GT_NO2'].skew()

1.507939283863649

In [106]:
train_model3_df_prep_mix.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure
0,45.601585,11.903551,0.0,280.097333,0.230527,0.559117,2.4e-05,0.000117,0.000131,14440.82126
1,45.371005,11.84083,3.047342,280.097333,-0.074006,0.869309,2.4e-05,0.000127,0.000131,14441.79815
2,45.045825,12.060869,0.0,280.097333,0.02447,0.67416,2.4e-05,8.6e-05,0.000131,14437.38294
3,45.104075,11.553241,1.200467,280.097333,-0.010442,0.920054,2.4e-05,0.000124,0.000131,14440.83831
4,45.038758,11.790152,1.274564,280.097333,-0.176178,0.747464,2.4e-05,0.000116,0.000131,14438.79037


In [107]:
train_model3_df_prep_mix_skew_trt=train_model3_df_prep_mix.copy()

In [108]:
#Treating Precipitation and Cloudfraction for positive skewness as the min is 0. Adding 0.001 to them

#Function to apply log for min. 0/<0 and >0
def log_transfrom_trt(series):
    if series.min()<0:
        offset=abs(series.min())+0.0001
    else:
        offset=0.0001
    return np.log(series+offset)





In [109]:
train_model3_df_prep_mix_skew_trt['Precipitation']=log_transfrom_trt(train_model3_df_prep_mix_skew_trt['Precipitation'])
train_model3_df_prep_mix_skew_trt['CloudFraction']=log_transfrom_trt(train_model3_df_prep_mix_skew_trt['CloudFraction'])
train_model3_df_prep_mix_skew_trt['NO2_total']=log_transfrom_trt(train_model3_df_prep_mix_skew_trt['NO2_total'])
train_model3_df_prep_mix_skew_trt['NO2_trop']=log_transfrom_trt(train_model3_df_prep_mix_skew_trt['NO2_trop'])

In [110]:
for col in Skewed_cols:
    print('Before:',col,train_model3_df_prep_mix.skew()[col])
    print('After:',col,train_model3_df_prep_mix_skew_trt.skew()[col])
    print('\n')

Before: Precipitation 4.569618429259263
After: Precipitation 1.5171822647199633


Before: CloudFraction 1.2368812312528774
After: CloudFraction -1.4714216645089138


Before: NO2_total 4.169192733648738
After: NO2_total 1.6688966196282646


Before: NO2_trop 2.935213957795029
After: NO2_trop 1.3232412318536575




In [111]:
#Skewness got increased in cloudfraction. Before it was 1.23 and now it is -1.47. Lets try square root transfrom.
train_model3_df_prep_mix_skew_trt['CloudFraction']=train_model3_df_prep_mix['CloudFraction']
train_model3_df_prep_mix_skew_trt['CloudFraction']=np.sqrt(train_model3_df_prep_mix_skew_trt['CloudFraction'])

In [112]:
#Finally the skewness got reduced to 0.47 for CloudFraction
for col in Skewed_cols:
    print('Before:',col,train_model3_df_prep_mix.skew()[col])
    print('After:',col,train_model3_df_prep_mix_skew_trt.skew()[col])
    print('\n')

#To summarize, following cols got log treatment: Precipitation, NO2_total, NO2_trop and CloudFraction got sqrt treatment in rtain.We will do the same in test

Before: Precipitation 4.569618429259263
After: Precipitation 1.5171822647199633


Before: CloudFraction 1.2368812312528774
After: CloudFraction 0.4707422286390351


Before: NO2_total 4.169192733648738
After: NO2_total 1.6688966196282646


Before: NO2_trop 2.935213957795029
After: NO2_trop 1.3232412318536575




In [113]:
test_model3_df_prep_mix_skew_trt=test_model3_df_prep_mix.copy()
print(test_model3_df_prep_mix.skew()[abs(test_model3_df_prep_mix.skew())>0.5])


LAT              0.839399
Precipitation    4.409900
AAI              0.557857
CloudFraction    1.295619
NO2_total        5.237289
NO2_trop         3.275638
dtype: float64


In [114]:
test_model3_df_prep_mix_skew_trt['Precipitation']=log_transfrom_trt(test_model3_df_prep_mix_skew_trt['Precipitation'])
test_model3_df_prep_mix_skew_trt['CloudFraction']=np.sqrt(test_model3_df_prep_mix_skew_trt['CloudFraction'])
test_model3_df_prep_mix_skew_trt['NO2_total']=log_transfrom_trt(test_model3_df_prep_mix_skew_trt['NO2_total'])
test_model3_df_prep_mix_skew_trt['NO2_trop']=log_transfrom_trt(test_model3_df_prep_mix_skew_trt['NO2_trop'])

In [115]:

for col in Skewed_cols:
    print('Before:',col,test_model3_df_prep_mix.skew()[col])
    print('After:',col,test_model3_df_prep_mix_skew_trt.skew()[col])
    print('\n')



Before: Precipitation 4.409900124327067
After: Precipitation 1.5310861116506305


Before: CloudFraction 1.29561885475446
After: CloudFraction 0.44339351502902985


Before: NO2_total 5.237289466539959
After: NO2_total 1.987697663090459


Before: NO2_trop 3.275637690439222
After: NO2_trop 1.37284394791529




In [116]:
#We did normalisation for Precipitation,CloudFraction,NO2_total,NO2_trop for both train and test. We saw skewness in dependent variable GT_NO2 
train_model3_df_prep_mix_skew_trt_GT_NO2=train_df['GT_NO2'].copy()
print('Before treatement',train_model3_df_prep_mix_skew_trt_GT_NO2.skew())
train_model3_df_prep_mix_skew_trt_GT_NO2=np.sqrt(train_model3_df_prep_mix_skew_trt_GT_NO2)
print('After treatement',train_model3_df_prep_mix_skew_trt_GT_NO2.skew())

Before treatement 1.507939283863649
After treatement 0.5905968870235135


In [117]:
#End of treating skewness

In [118]:
test_model3_df_prep_mix_skew_trt.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure
0,45.289376,11.642394,1.18712,279.369667,-0.313361,0.878326,2.4e-05,-8.649012,-8.406815,14440.02819
1,45.836941,12.510362,-9.21034,279.369667,-0.229512,0.631038,2.3e-05,-8.421883,-8.406815,14434.0479
2,45.582894,8.842165,-9.21034,282.98,-0.470822,0.392039,2.3e-05,-8.213392,-8.26659,14427.42478
3,45.131947,10.015742,0.656551,279.369667,0.132952,0.87001,2.4e-05,-7.912877,-8.406815,14443.09006
4,45.186329,9.146666,-9.21034,279.369667,-0.198272,0.823928,2.3e-05,-8.298058,-8.406815,14440.8584


In [119]:
train_model3_df_prep_mix_skew_trt_GT_NO2.head(5)

0    5.567764
1    6.480741
2    5.567764
3    5.477226
4    7.615773
Name: GT_NO2, dtype: float64

In [120]:
train_model3_df_prep_mix_skew_trt_GT_NO2.head(5)

0    5.567764
1    6.480741
2    5.567764
3    5.477226
4    7.615773
Name: GT_NO2, dtype: float64

In [121]:
type(train_model3_df_prep_mix_skew_trt_GT_NO2)

pandas.core.series.Series

In [122]:
train_model3_df_prep_mix_skew_trt.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure
0,45.601585,11.903551,-9.21034,280.097333,0.230527,0.747741,2.4e-05,-8.382225,-8.268476,14440.82126
1,45.371005,11.84083,1.114303,280.097333,-0.074006,0.932367,2.4e-05,-8.339466,-8.268476,14441.79815
2,45.045825,12.060869,-9.21034,280.097333,0.02447,0.821073,2.4e-05,-8.526739,-8.268476,14437.38294
3,45.104075,11.553241,0.182794,280.097333,-0.010442,0.959194,2.4e-05,-8.352103,-8.268476,14440.83831
4,45.038758,11.790152,0.242682,280.097333,-0.176178,0.86456,2.4e-05,-8.386604,-8.268476,14438.79037


In [123]:
test_model3_df_prep_mix_skew_trt.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure
0,45.289376,11.642394,1.18712,279.369667,-0.313361,0.878326,2.4e-05,-8.649012,-8.406815,14440.02819
1,45.836941,12.510362,-9.21034,279.369667,-0.229512,0.631038,2.3e-05,-8.421883,-8.406815,14434.0479
2,45.582894,8.842165,-9.21034,282.98,-0.470822,0.392039,2.3e-05,-8.213392,-8.26659,14427.42478
3,45.131947,10.015742,0.656551,279.369667,0.132952,0.87001,2.4e-05,-7.912877,-8.406815,14443.09006
4,45.186329,9.146666,-9.21034,279.369667,-0.198272,0.823928,2.3e-05,-8.298058,-8.406815,14440.8584


In [124]:
Columns_to_rescale=['Precipitation','LST','AAI','CloudFraction','NO2_strat','NO2_total','NO2_trop','TropopausePressure']
scaler = StandardScaler()

In [125]:
train_model3_df_prep_mix_skew_trt[Columns_to_rescale]=scaler.fit_transform(train_model3_df_prep_mix_skew_trt[Columns_to_rescale])

In [126]:
test_model3_df_prep_mix_skew_trt[Columns_to_rescale]=scaler.transform(test_model3_df_prep_mix_skew_trt[Columns_to_rescale])

In [127]:
test_model3_df_prep_mix_skew_trt.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure
0,45.289376,11.642394,1.781691,-1.423392,1.340193,1.695411,-1.813458,-1.45161,0.036426,-0.769407
1,45.836941,12.510362,-0.503912,-1.423392,1.459512,0.823726,-1.865311,-0.600748,0.036426,-0.77139
2,45.582894,8.842165,-0.503912,-1.110515,1.116124,-0.01874,-1.908523,0.180295,0.579517,-0.773586
3,45.131947,10.015742,1.665059,-1.423392,1.975304,1.666098,-1.8221,1.306072,0.036426,-0.768392
4,45.186329,9.146666,-0.503912,-1.423392,1.503967,1.503661,-1.856669,-0.136878,0.036426,-0.769132


In [128]:
train_model3_df_prep_mix_skew_trt.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure
0,45.601585,11.903551,-0.503912,-1.360332,2.114154,1.235104,-1.830742,-0.452184,0.572214,-0.769145
1,45.371005,11.84083,1.765684,-1.360332,1.680799,1.885906,-1.8221,-0.291999,0.572214,-0.768821
2,45.045825,12.060869,-0.503912,-1.360332,1.820932,1.493595,-1.813458,-0.993555,0.572214,-0.770285
3,45.104075,11.553241,1.560917,-1.360332,1.771252,1.98047,-1.804815,-0.339339,0.572214,-0.769139
4,45.038758,11.790152,1.574082,-1.360332,1.535407,1.646886,-1.804815,-0.468586,0.572214,-0.769818


In [129]:
#We have null in train_model3_df_prep_mix_skew_trt_GT_NO2
train_model3_df_prep_mix_skew_trt['GT_NO2']=train_model3_df_prep_mix_skew_trt_GT_NO2

In [130]:
train_model3_df_prep_mix_skew_trt_NO2_not_Na=train_model3_df_prep_mix_skew_trt[train_model3_df_prep_mix_skew_trt['GT_NO2'].notna()]
train_model3_df_prep_mix_skew_trt_NO2_NA=train_model3_df_prep_mix_skew_trt[train_model3_df_prep_mix_skew_trt['GT_NO2'].isna()]

In [131]:
train_model3_df_prep_mix_skew_trt_NO2_not_Na_GT_NO2=train_model3_df_prep_mix_skew_trt_NO2_not_Na['GT_NO2']
train_model3_df_prep_mix_skew_trt_NO2_not_Na=train_model3_df_prep_mix_skew_trt_NO2_not_Na.drop('GT_NO2',axis=1)

In [132]:
train_model3_df_prep_mix_skew_trt_NO2_NA=train_model3_df_prep_mix_skew_trt_NO2_NA.drop('GT_NO2',axis=1)

In [133]:
model_fill_na=LinearRegression()
model_fill_na.fit(train_model3_df_prep_mix_skew_trt_NO2_not_Na,train_model3_df_prep_mix_skew_trt_NO2_not_Na_GT_NO2)
y_pred_fill_na=model_fill_na.predict(train_model3_df_prep_mix_skew_trt_NO2_NA)

In [134]:
train_model3_df_prep_mix_skew_trt_NO2_not_Na['GT_NO2']=train_model3_df_prep_mix_skew_trt_NO2_not_Na_GT_NO2

In [135]:
train_model3_df_prep_mix_skew_trt_NO2_NA['GT_NO2']=y_pred_fill_na

In [136]:
train_model3_df_prep_mix_skew_trt_NO2_not_Na.count()

LAT                   82051
LON                   82051
Precipitation         82051
LST                   82051
AAI                   82051
CloudFraction         82051
NO2_strat             82051
NO2_total             82051
NO2_trop              82051
TropopausePressure    82051
GT_NO2                82051
dtype: int64

In [137]:
train_model3_df_prep_mix_skew_trt = pd.concat([train_model3_df_prep_mix_skew_trt_NO2_not_Na, train_model3_df_prep_mix_skew_trt_NO2_NA], ignore_index=True)

In [138]:
train_model3_df_prep_mix_skew_trt.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure,GT_NO2
0,45.601585,11.903551,-0.503912,-1.360332,2.114154,1.235104,-1.830742,-0.452184,0.572214,-0.769145,5.567764
1,45.371005,11.84083,1.765684,-1.360332,1.680799,1.885906,-1.8221,-0.291999,0.572214,-0.768821,6.480741
2,45.045825,12.060869,-0.503912,-1.360332,1.820932,1.493595,-1.813458,-0.993555,0.572214,-0.770285,5.567764
3,45.104075,11.553241,1.560917,-1.360332,1.771252,1.98047,-1.804815,-0.339339,0.572214,-0.769139,5.477226
4,45.038758,11.790152,1.574082,-1.360332,1.535407,1.646886,-1.804815,-0.468586,0.572214,-0.769818,7.615773


In [139]:
train_model3_df_prep_mix_skew_trt_GT_NO2=train_model3_df_prep_mix_skew_trt['GT_NO2']
train_model3_df_prep_mix_skew_trt=train_model3_df_prep_mix_skew_trt.drop('GT_NO2',axis=1)

In [142]:
train_model3_df_prep_mix_skew_trt.count()

LAT                   86584
LON                   86584
Precipitation         86584
LST                   86584
AAI                   86584
CloudFraction         86584
NO2_strat             86584
NO2_total             86584
NO2_trop              86584
TropopausePressure    86584
dtype: int64

In [140]:
#Creating model 4
model4 = LinearRegression()
model4.fit(train_model3_df_prep_mix_skew_trt, train_model3_df_prep_mix_skew_trt_GT_NO2)

# Predict using the test dataset
y_pred4 = model4.predict(test_model3_df_prep_mix_skew_trt)

# Add additional 'ID_Zindi' to y_pred3 and create a DataFrame
y_pred4_df = pd.DataFrame(y_pred4, columns=['GT_NO2'])
model4_result_df = pd.concat([test_df['ID_Zindi'], y_pred4_df], axis=1)
model4_result_df['GT_NO2']=model4_result_df['GT_NO2']**2
model4_result_df.to_csv('/home/anuragverma/Desktop/Kaggle/GeoAI Ground-level NO2 _Zindi/model4.csv', index=False)

#Best till now :10.66262869


In [141]:
model4_result_df.head(5)

Unnamed: 0,ID_Zindi,GT_NO2
0,ID_2MYNQS,25.156304
1,ID_P4U5WU,33.625701
2,ID_U4KWPK,36.380869
3,ID_QGSNTZ,35.057258
4,ID_GHSZ6K,29.005561


In [None]:
# 	ID_Zindi	GT_NO2
# 0	ID_2MYNQS	25.156304
# 1	ID_P4U5WU	33.625701
# 2	ID_U4KWPK	36.380869
# 3	ID_QGSNTZ	35.057258
# 4	ID_GHSZ6K	29.005561

In [143]:
train_model3_df_prep_mix_skew_trt.head(5)

Unnamed: 0,LAT,LON,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure
0,45.601585,11.903551,-0.503912,-1.360332,2.114154,1.235104,-1.830742,-0.452184,0.572214,-0.769145
1,45.371005,11.84083,1.765684,-1.360332,1.680799,1.885906,-1.8221,-0.291999,0.572214,-0.768821
2,45.045825,12.060869,-0.503912,-1.360332,1.820932,1.493595,-1.813458,-0.993555,0.572214,-0.770285
3,45.104075,11.553241,1.560917,-1.360332,1.771252,1.98047,-1.804815,-0.339339,0.572214,-0.769139
4,45.038758,11.790152,1.574082,-1.360332,1.535407,1.646886,-1.804815,-0.468586,0.572214,-0.769818
