# Merge PROMICE with S3_extract data
Process the S3-data and merges it with PROMICE data.

In [None]:
import pandas as pd
import numpy as np

s3_extract = pd.read_csv('./data/OLCI/S3_extract.csv') #Put in path to S3_extract data
full_size = s3_extract.shape[0]
s3_extract = s3_extract.drop_duplicates(keep='first') #dropping duplicates except first sample, if any.
print(f'Removed duplicates: {full_size-s3_extract.shape[0]}')

# Specify columns to keep
s3_extract = s3_extract[['station', 'year', 'month', 'minute', 'dayofyear', 'hour', 'sza', 'vza', 'saa', 'vaa', 'slope', 'ndsi', 'ndbi', 'humidity', 'total_columnar_water_vapour', 'total_ozone',
             'Oa01_reflectance','Oa02_reflectance','Oa03_reflectance','Oa04_reflectance','Oa05_reflectance','Oa06_reflectance','Oa07_reflectance',
             'Oa08_reflectance','Oa09_reflectance', 'Oa10_reflectance','Oa11_reflectance', 'Oa12_reflectance','Oa13_reflectance','Oa14_reflectance',
             'Oa15_reflectance','Oa16_reflectance','Oa17_reflectance','Oa18_reflectance','Oa19_reflectance','Oa20_reflectance','Oa21_reflectance',
            ]]

# the PROMICE data is only given in hourly interval. Hence the hour have to be corrected in the s3_extract data.
#s3_extract['hour'] = s3_extract['hour'] - 1 if s3_extract['minute'].astype('int64') < 30 else (s3_extract['hour'] + 1) 

# Process data to the right format
s3_extract = s3_extract.round({
    'sza': 2,
    'vza': 2,
    'saa': 2,
    'vaa': 2,
    'slope': 2,
    'ndsi': 2, 
    'ndbi': 2, 
    'humidity': 2, 
    'total_columnar_water_vapour': 2,
    'Oa01_reflectance': 2,
    'Oa02_reflectance': 2,
    'Oa03_reflectance': 2,
    'Oa04_reflectance': 2,
    'Oa05_reflectance': 2,
    'Oa06_reflectance': 2,
    'Oa07_reflectance': 2,
    'Oa08_reflectance': 2,
    'Oa09_reflectance': 2,
    'Oa10_reflectance': 2,
    'Oa11_reflectance': 2,
    'Oa12_reflectance': 2,
    'Oa13_reflectance': 2,
    'Oa14_reflectance': 2,
    'Oa15_reflectance': 2,
    'Oa16_reflectance': 2,
    'Oa17_reflectance': 2,
    'Oa18_reflectance': 2,
    'Oa19_reflectance': 2,
    'Oa20_reflectance': 2,
    'Oa21_reflectance': 2
})

# Rename columns
s3_extract.rename(columns={'station':'station',
                           'year': 'year', 
                           'month': 'month',
                           'dayofyear': 'dayofyear',
                           'hour': 'hour',
                           'sza': 'sza',
                           'vza': 'vza',
                           'saa': 'saa', 
                           'vaa': 'vaa',
                           'slope': 'slope',
                           'ndsi': 'ndsi',
                           'ndbi': 'ndbi',
                           'humidity': 'humidity',
                           'total_columnar_water_vapour': 'total_columnar_water_vapour',
                           'total_ozone': 'total_ozone',
                           'Oa01_reflectance': 'Oa01',
                           'Oa02_reflectance': 'Oa02',
                           'Oa03_reflectance': 'Oa03',
                           'Oa04_reflectance': 'Oa04',
                           'Oa05_reflectance': 'Oa05',
                           'Oa06_reflectance': 'Oa06',
                           'Oa07_reflectance': 'Oa07',
                           'Oa08_reflectance': 'Oa08',
                           'Oa09_reflectance': 'Oa09',
                           'Oa10_reflectance': 'Oa10',
                           'Oa11_reflectance': 'Oa11',
                           'Oa12_reflectance': 'Oa12',
                           'Oa13_reflectance': 'Oa13',
                           'Oa14_reflectance': 'Oa14',
                           'Oa15_reflectance': 'Oa15',
                           'Oa16_reflectance': 'Oa16',
                           'Oa17_reflectance': 'Oa17',
                           'Oa18_reflectance': 'Oa18',
                           'Oa19_reflectance': 'Oa19',
                           'Oa20_reflectance': 'Oa20',
                           'Oa21_reflectance': 'Oa21' 
                          }, 
                  inplace=True)

# Merge images with PROMICE data
OLCI_with_PROMICE = pd.merge(left=s3_extract, right=pd.read_csv('data/PROMICE/PROMICE.csv'), 
                             left_on=['year','month','dayofyear','hour','station'], 
                             right_on=['year','month','dayofyear','hour', 'station'])
# Create merged csv
OLCI_with_PROMICE.to_csv(f'data/OLCI/S3_extract_with_PROMICE.csv', index=None) 