In [None]:
!pip install scikit-learn==1.0
!pip install xgboost==1.4.2
!pip install catboost==0.26.1
!pip install pandas==1.3.3
!pip install radiant-mlhub==0.3.0
!pip install rasterio==1.2.8
!pip install numpy==1.21.2
!pip install pathlib==1.0.1
!pip install tqdm==4.62.3
!pip install joblib==1.0.1
!pip install matplotlib==3.4.3
!pip install Pillow==8.3.2
!pip install torch==1.9.1
!pip install plotly==5.3.1


In [None]:
import warnings
warnings.filterwarnings('ignore')
# warnings.filterwarnings('RuntimeWarning')

from radiant_mlhub import Collection
import tarfile
import os
from pathlib import Path
import json
from tqdm import tqdm

import datetime
import rasterio
import numpy as np
import pandas as pd
from collections import ChainMap
from collections import Counter
import gc
import os
from joblib import Parallel,delayed
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageDraw
import pandas as pd
import rasterio
import math
gc.collect()

9

In [None]:
def get_date_format(month,day):
    '''
        Structures the dates in a particular format
    '''
    if (str(month)=='nan') or (str(day)=='nan'):
        return 'nan'
    else:
        if month>=10:
            if day>=10:
                return f'month_{str(int(month))}_day_{str(int(day))}'
            else:
                return f'month_{str(int(month))}_day_0{str(int(day))}'
        else:
            if day>=10:
                return f'month_0{str(int(month))}_day_{str(int(day))}'
            else:
                return f'month_0{str(int(month))}_day_0{str(int(day))}'

            
def attain_field_labels(tile_df):
    
    
    field_id_src             = rasterio.open(tile_df[tile_df['asset']=='field_ids']['file_path'].values[0])
    field_id_array           = field_id_src.read(1)
    
    
    return field_id_array

def get_bands(tile_df,date_time,band):
    try:
        source                   = rasterio.open(tile_df[(tile_df['dates']==date_time) & (tile_df['asset']==band)]['file_path'].values[0])
        band_array               = np.expand_dims(source.read(1).flatten(), axis=1)
        band_array               = np.reshape(band_array,(256,256))
        return band_array
        
    except:
        band_array               = np.array([np.nan]*65536)
        return band_array.reshape(256,256)


def get_aggregate_values(field_id_array):
    unique_fields          = np.unique(field_id_array)
    field_dictionary       = {}
    for fields in unique_fields:
        if fields!=0:
            
            field_assigned  = np.multiply(np.where(field_id_array==fields,1,0),field_id_array)
            
            field_assigned  = field_assigned.flatten()
            
            
            field2select    = np.unique(field_assigned)[1]
            field_distrib   = Counter(field_assigned)
            print('Field id ', fields, 'percentage ',(field_distrib[field2select])/65536)
            field_dictionary[fields] = (1000*(field_distrib[field2select]))/65536
            
    return field_dictionary       
            
        
    

In [None]:
competition_train_df          = pd.read_csv('train_data_sentinel2.csv')
competition_train_df['month'] = pd.to_datetime(competition_train_df['datetime']).dt.month.values 
competition_train_df['day']   = pd.to_datetime(competition_train_df['datetime']).dt.day.values

competition_train_df['dates'] = competition_train_df.apply(lambda z: get_date_format(z['month'],z['day']),axis=1)

unique_dates                  = competition_train_df['dates'].unique()
unique_dates                  = np.array([z for z in unique_dates if 'nan' not in z])

print(f'Length of unique dates {len(unique_dates)}')

Length of unique dates 76


In [None]:
date_dict                    = dict(zip(competition_train_df['datetime'].dropna().unique(),unique_dates))
date_dict                    = dict(sorted(date_dict.items(), key=lambda item: item[1]))
date_order_to_consider       = np.array(list(date_dict.values()))
tile_ids_train               = competition_train_df['tile_id'].unique()
date2parse                   = [key for key,_ in date_dict.items()]

In [None]:
def tile_id_fetch(tile_ids,date_specific,band,regular_update_field_dict):
    if tile_ids!=1951:
        
        tile_df                    = competition_train_df[competition_train_df['tile_id']==tile_ids]
        
        field_id_array = attain_field_labels(tile_df)
        
        field_agg      = get_aggregate_values(field_id_array)
        
        return field_agg
    else:
        return {}


In [None]:


bands = ['B01']


for band in bands:
    overall_date_dictionary = {}
    for date_specific in tqdm(date_order_to_consider[:1]):

        regular_update_field_dict      = {}

        tile_field_aggregate           = Parallel(n_jobs=-1,timeout=100000,backend="multiprocessing", verbose=1)(delayed(tile_id_fetch)(tile_ids=i,date_specific=date_specific,band=band,regular_update_field_dict=regular_update_field_dict) for i in tile_ids_train)
        _                          = regular_update_field_dict.update(dict(ChainMap(*tile_field_aggregate)))
        print(len(regular_update_field_dict))       
               

        overall_date_dictionary['size_of_field'] = regular_update_field_dict
        
        
        gc.collect()

    df = pd.DataFrame(overall_date_dictionary).reset_index().rename({'index':'Field_id'},axis=1)




  0%|          | 0/1 [00:00<?, ?it/s][Parallel(n_jobs=-1)]: Using backend MultiprocessingBackend with 32 concurrent workers.


Field id  114 percentage  0.0005645751953125
Field id  6149 percentage  0.0006256103515625
Field id  25291 percentage  0.0093231201171875
Field id  5901 percentage  0.0133209228515625
Field id  1640 percentage  6.103515625e-05
Field id  231 percentage  0.0001373291015625
Field id  14643 percentage  0.0020294189453125
Field id  4881 percentage  0.0456390380859375
Field id  4130 percentage  0.0021514892578125
Field id  4696 percentage  0.0008087158203125
Field id  246 percentage  0.0029144287109375
Field id  35013 percentage  0.0005035400390625
Field id  25186 percentage  0.0078887939453125
Field id  4542 percentage  1.52587890625e-05
Field id  11012 percentage  0.002685546875
Field id  5466 percentage  0.0136260986328125
Field id  3338 percentage  0.0175628662109375
Field id  296 percentage  0.0013427734375
Field id  5362 percentage  0.00311279296875
Field id  1934 percentage  0.032196044921875
Field id  25046 percentage  0.0030517578125
Field id  11928 percentage  0.0160064697265625
Fi

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  36231 percentage  0.0020904541015625
Field id  16764 percentage  0.002044677734375
Field id  67617 percentage  0.0153045654296875
Field id  96264 percentage  0.05828857421875
Field id  890 percentage  0.0005950927734375
Field id  81142 percentage  0.000244140625
Field id  115051 percentage  0.0059356689453125
Field id  35053 percentage  0.0135345458984375
Field id  112646 percentage  0.0443267822265625
Field id  100960 percentage  0.0103607177734375
Field id  48735 percentage  0.002227783203125
Field id  91473 percentage  0.016937255859375
Field id  78315 percentage  0.002960205078125
Field id  23179 percentage  0.009796142578125
Field id  21422 percentage  0.0005950927734375
Field id  75548 percentage  0.0127105712890625
Field id  98744 percentage  0.0020294189453125
Field id  69795 percentage  0.003082275390625
Field id  41091 percentage  0.0140533447265625
Field id  111278 percentage  0.000457763671875
Field id  94108 percentage  0.0017547607421875
Field id  30291 percenta

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  61085 percentage  0.04620361328125
Field id  60161 percentage  0.0059661865234375
Field id  73787 percentage  0.000274658203125
Field id  19529 percentage  0.006866455078125
Field id  49405 percentage  0.00091552734375
Field id  65569 percentage  0.0026397705078125
Field id  49238 percentage  0.00201416015625
Field id  106062 percentage  0.0263519287109375
Field id  12308 percentage  0.0156097412109375
Field id  91721 percentage  0.031585693359375
Field id  48691 percentage  0.028656005859375
Field id  10847 percentage  0.0052490234375
Field id  12474 percentage  0.0149688720703125
Field id  5542 percentage  0.0046539306640625
Field id  43157 percentage  0.0012969970703125
Field id  33376 percentage  0.0050506591796875
Field id  64099 percentage  0.003021240234375
Field id  110267 percentage  0.0059356689453125
Field id  14086 percentage  0.0002593994140625
Field id  60349 percentage  0.01275634765625
Field id  62583 percentage  0.022613525390625
Field id  118824 percentage  

[Parallel(n_jobs=-1)]: Done 386 tasks      | elapsed:    8.3s


Field id  89255 percentage  0.0010223388671875
Field id  11065 percentage  0.0150909423828125
Field id  23851 percentage  0.0011138916015625
Field id  33904 percentage  0.000946044921875
Field id  110788 percentage  0.0210113525390625
Field id  16020 percentage  4.57763671875e-05
Field id  8262 percentage  0.0010833740234375
Field id  53287 percentage  0.00079345703125
Field id  90252 percentage  0.000885009765625
Field id  91074 percentage  0.00054931640625
Field id  22417 percentage  0.0578765869140625
Field id  27992 percentage  0.042572021484375
Field id  53145 percentage  0.0024871826171875
Field id  1868 percentage  0.0021820068359375
Field id  51674 percentage  0.007965087890625
Field id  51711 percentage  0.0599517822265625
Field id  57833 percentage  0.0007781982421875
Field id  38411 percentage  0.0348052978515625
Field id  111674 percentage  0.0189208984375
Field id  57727 percentage  0.0106353759765625
Field id  86615 percentage  0.0024566650390625
Field id  1143 percentage

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  59472 percentage  0.008453369140625
Field id  114973 percentage  0.0007171630859375
Field id  84722 percentage  0.0041961669921875
Field id  39355 percentage  0.0032196044921875
Field id  107323 percentage  0.002349853515625
Field id  98732 percentage  0.0012359619140625
Field id  86137 percentage  0.000213623046875
Field id  66199 percentage  0.030609130859375
Field id  21737 percentage  0.0045928955078125
Field id  55377 percentage  0.082000732421875
Field id  104368 percentage  0.00018310546875
Field id  72869 percentage  0.0051422119140625
Field id  18793 percentage  0.0011138916015625
Field id  80777 percentage  0.0062713623046875
Field id  68035 percentage  0.00457763671875
Field id  90356 percentage  0.034149169921875
Field id  105572 percentage  0.00848388671875
Field id  61306 percentage  6.103515625e-05
Field id  118202 percentage  0.0013885498046875
Field id  108226 percentage  0.0052642822265625
Field id  69468 percentage  0.00347900390625
Field id  70292 percenta

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  114995 percentage  0.000885009765625
Field id  20376 percentage  0.001708984375
Field id  37447 percentage  0.00146484375
Field id  107158 percentage  0.0015411376953125
Field id  84485 percentage  0.0172882080078125
Field id  45342 percentage  0.0003204345703125
Field id  84627 percentage  0.0044708251953125
Field id  115975 percentage  0.0041656494140625
Field id  89419 percentage  0.003753662109375
Field id  3287 percentage  0.0005035400390625
Field id  11339 percentage  0.02911376953125
Field id  101302 percentage  0.0151824951171875
Field id  16512 percentage  0.0028839111328125
Field id  107373 percentage  0.0005340576171875
Field id  95476 percentage  0.010101318359375
Field id  54978 percentage  9.1552734375e-05
Field id  61386 percentage  0.0365447998046875
Field id  39106 percentage  0.00982666015625
Field id  28190 percentage  0.0042724609375
Field id  106894 percentage  0.018096923828125
Field id  119638 percentage  0.0089111328125
Field id  45704 percentage  0.00

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  99686 percentage  0.0028076171875
Field id  66625 percentage  0.0030517578125
Field id  79952 percentage  0.037139892578125
Field id  71032 percentage  0.026580810546875
Field id  115148 percentage  0.00439453125
Field id  97721 percentage  0.00396728515625
Field id  53022 percentage  0.001708984375
Field id  10537 percentage  0.000946044921875
Field id  101299 percentage  0.006378173828125
Field id  115781 percentage  0.0105438232421875
Field id  65612 percentage  0.0025482177734375
Field id  80658 percentage  0.0161285400390625
Field id  41858 percentage  0.003936767578125
Field id  17571 percentage  0.0052337646484375
Field id  40339 percentage  0.0046844482421875
Field id  18200 percentage  0.002288818359375
Field id  24739 percentage  0.02825927734375
Field id  94908 percentage  0.0004425048828125
Field id  12239 percentage  0.0022735595703125
Field id  66662 percentage  0.007049560546875
Field id  121192 percentage  0.0320587158203125
Field id  119806 percentage  0.0026

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  71401 percentage  0.00274658203125
Field id  86029 percentage  0.0016326904296875
Field id  81302 percentage  0.0004119873046875
Field id  2846 percentage  0.0223846435546875
Field id  54334 percentage  0.001129150390625
Field id  75505 percentage  0.0015106201171875
Field id  59410 percentage  0.0069580078125
Field id  18421 percentage  0.002197265625
Field id  72283 percentage  0.0060272216796875
Field id  87982 percentage  0.0009613037109375
Field id  67521 percentage  0.0032501220703125
Field id  19571 percentage  9.1552734375e-05
Field id  46061 percentage  0.001129150390625
Field id  119852 percentage  0.0450897216796875
Field id  82692 percentage  0.01055908203125
Field id  42838 percentage  0.0067901611328125
Field id  81718 percentage  0.0015411376953125
Field id  120261 percentage  0.021240234375
Field id  2191 percentage  0.000213623046875
Field id  113319 percentage  0.00018310546875
Field id  3743 percentage  0.0092315673828125
Field id  114826 percentage  0.0010

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  3483 percentage  0.0203094482421875
Field id  107275 percentage  4.57763671875e-05
Field id  68934 percentage  0.00177001953125
Field id  57307 percentage  0.017333984375
Field id  115749 percentage  0.0009613037109375
Field id  120975 percentage  0.001495361328125
Field id  69960 percentage  0.0090484619140625
Field id  86550 percentage  0.000823974609375
Field id  65151 percentage  0.00689697265625
Field id  29747 percentage  0.0060577392578125
Field id  105980 percentage  0.0059967041015625
Field id  83465 percentage  0.00238037109375
Field id  69509 percentage  0.0013427734375
Field id  20457 percentage  0.01092529296875
Field id  79851 percentage  0.0731201171875
Field id  73362 percentage  0.0054779052734375
Field id  117372 percentage  0.0033416748046875
Field id  24790 percentage  0.0028076171875
Field id  100054 percentage  0.0009918212890625
Field id  10300 percentage  0.0356597900390625
Field id  3494 percentage  0.0036163330078125
Field id  89375 percentage  0.004

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  109491 percentage  0.00213623046875
Field id  42756 percentage  0.005096435546875
Field id  89456 percentage  0.0589752197265625
Field id  37101 percentage  0.023468017578125
Field id  11425 percentage  0.0023040771484375
Field id  53250 percentage  0.00567626953125
Field id  68674 percentage  0.0021514892578125
Field id  3540 percentage  0.024993896484375
Field id  1965 percentage  0.0035247802734375
Field id  73197 percentage  0.011932373046875
Field id  14599 percentage  0.0337371826171875
Field id  15649 percentage  0.0006866455078125
Field id  21480 percentage  0.005126953125
Field id  32687 percentage  0.002105712890625
Field id  21973 percentage  0.005889892578125
Field id  108804 percentage  0.01708984375
Field id  67973 percentage  0.001190185546875
Field id  80242 percentage  0.001708984375
Field id  45626 percentage  0.0028839111328125
Field id  23172 percentage  0.00439453125
Field id  8359 percentage  0.05615234375
Field id  67023 percentage  0.0003662109375
Fiel

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  56209 percentage  0.0110626220703125
Field id  76367 percentage  0.0235748291015625
Field id  120381 percentage  0.0007476806640625
Field id  9925 percentage  0.00738525390625
Field id  70302 percentage  0.0067291259765625
Field id  110711 percentage  0.01678466796875
Field id  33686 percentage  0.0184478759765625
Field id  11443 percentage  0.0035400390625
Field id  49580 percentage  0.0375213623046875
Field id  38551 percentage  0.0017547607421875
Field id  82267 percentage  0.0056304931640625
Field id  99050 percentage  0.0035247802734375
Field id  67481 percentage  0.006988525390625
Field id  36673 percentage  0.090728759765625
Field id  67638 percentage  0.0830535888671875
Field id  76668 percentage  0.0039825439453125
Field id  75970 percentage  0.001708984375
Field id  54218 percentage  0.0120849609375
Field id  9769 percentage  0.0929412841796875
Field id  84639 percentage  0.009124755859375
Field id  102319 percentage  0.0047149658203125
Field id  22523 percentage  0

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  64289 percentage  0.0031890869140625
Field id  108185 percentage  0.0028228759765625
Field id  32457 percentage  0.0006866455078125
Field id  119668 percentage  0.000457763671875
Field id  103055 percentage  0.0422515869140625
Field id  83193 percentage  0.006378173828125
Field id  92839 percentage  0.042236328125
Field id  81247 percentage  0.0552520751953125
Field id  29983 percentage  0.034942626953125
Field id  63165 percentage  0.0048675537109375
Field id  119143 percentage  0.0005950927734375
Field id  47034 percentage  0.04840087890625
Field id  61675 percentage  0.0558624267578125
Field id  64043 percentage  0.0035400390625
Field id  8890 percentage  0.0065460205078125
Field id  120205 percentage  0.0026397705078125
Field id  30398 percentage  0.000274658203125
Field id  39654 percentage  0.0023193359375
Field id  86881 percentage  0.0018157958984375
Field id  113747 percentage  0.0098724365234375
Field id  119957 percentage  0.0024871826171875
Field id  109730 percen

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  37442 percentage  0.02142333984375
Field id  103712 percentage  0.011810302734375
Field id  23461 percentage  0.014129638671875
Field id  44098 percentage  0.0013427734375
Field id  63046 percentage  0.001678466796875
Field id  92100 percentage  0.0098876953125
Field id  39599 percentage  0.00048828125
Field id  76277 percentage  0.01953125
Field id  74521 percentage  0.0010986328125
Field id  69400 percentage  0.004608154296875
Field id  79492 percentage  0.0104827880859375
Field id  81963 percentage  0.001922607421875
Field id  105958 percentage  0.008697509765625
Field id  64190 percentage  0.0174407958984375
Field id  120140 percentage  0.01788330078125
Field id  38930 percentage  0.0166015625
Field id  50803 percentage  0.0029144287109375
Field id  90772 percentage  0.0221710205078125
Field id  87441 percentage  0.00018310546875
Field id  24848 percentage  0.0034332275390625
Field id  66861 percentage  0.0071868896484375
Field id  45648 percentage  0.0076751708984375
Fie

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  28107 percentage  0.00604248046875
Field id  18982 percentage  0.04473876953125
Field id  67413 percentage  0.001373291015625
Field id  106120 percentage  0.0277252197265625
Field id  30348 percentage  0.008209228515625
Field id  113844 percentage  0.0319366455078125
Field id  42196 percentage  7.62939453125e-05
Field id  61163 percentage  0.031402587890625
Field id  27002 percentage  0.0066070556640625
Field id  12801 percentage  0.002532958984375
Field id  10466 percentage  0.0051422119140625
Field id  74149 percentage  0.003204345703125
Field id  48565 percentage  0.00390625
Field id  119089 percentage  0.0040130615234375
Field id  99840 percentage  0.0034637451171875
Field id  29740 percentage  0.0078277587890625
Field id  27342 percentage  0.063690185546875
Field id  45635 percentage  0.0174407958984375
Field id  118662 percentage  0.001800537109375
Field id  109378 percentage  0.035064697265625
Field id  61184 percentage  0.003509521484375
Field id  50044 percentage  0.

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  3691 percentage  0.004425048828125
Field id  120230 percentage  0.0023651123046875
Field id  57750 percentage  0.05615234375
Field id  73020 percentage  0.0056915283203125
Field id  46 percentage  0.01763916015625
Field id  57796 percentage  0.0033111572265625
Field id  107682 percentage  0.0010528564453125
Field id  10301 percentage  0.0012664794921875
Field id  68872 percentage  0.001617431640625
Field id  34148 percentage  0.008819580078125
Field id  19818 percentage  0.00244140625
Field id  56509 percentage  0.0487060546875
Field id  103907 percentage  0.001373291015625
Field id  103073 percentage  0.06475830078125
Field id  94354 percentage  0.00030517578125
Field id  114104 percentage  0.001922607421875
Field id  14744 percentage  0.0037078857421875
Field id  22692 percentage  0.0009765625
Field id  95462 percentage  0.005859375
Field id  120813 percentage  0.0114593505859375
Field id  105413 percentage  0.0019683837890625
Field id  44878 percentage  0.01171875
Field id

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  103941 percentage  0.0233001708984375
Field id  63050 percentage  0.004730224609375
Field id  46268 percentage  0.0086212158203125
Field id  51431 percentage  0.001678466796875
Field id  67247 percentage  0.0202484130859375
Field id  495 percentage  0.0001220703125
Field id  76084 percentage  0.026763916015625
Field id  86288 percentage  0.000152587890625
Field id  7508 percentage  0.00091552734375
Field id  10739 percentage  0.0047760009765625
Field id  102753 percentage  0.0066680908203125
Field id  25913 percentage  0.0113525390625
Field id  58527 percentage  0.0013427734375
Field id  114955 percentage  0.04937744140625
Field id  104036 percentage  0.0001220703125
Field id  8350 percentage  0.0030059814453125
Field id  838 percentage  0.0064239501953125
Field id  119531 percentage  0.001861572265625
Field id  55159 percentage  0.0037994384765625
Field id  78524 percentage  0.0042724609375
Field id  5322 percentage  0.078765869140625
Field id  64801 percentage  0.0280914306

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  94843 percentage  0.0336456298828125
Field id  57027 percentage  0.0030975341796875
Field id  105181 percentage  0.00164794921875
Field id  21708 percentage  0.00128173828125
Field id  110026 percentage  0.0042266845703125
Field id  14104 percentage  0.018890380859375
Field id  5483 percentage  0.0026397705078125
Field id  32837 percentage  0.021728515625
Field id  87718 percentage  0.001007080078125
Field id  53966 percentage  0.000640869140625
Field id  115018 percentage  0.0003814697265625
Field id  101466 percentage  0.2445526123046875
Field id  15798 percentage  1.52587890625e-05
Field id  108379 percentage  0.008148193359375
Field id  63581 percentage  0.0008087158203125
Field id  48383 percentage  0.007232666015625
Field id  103791 percentage  0.00091552734375
Field id  92060 percentage  0.0027618408203125
Field id  98040 percentage  0.00189208984375
Field id  23631 percentage  0.009613037109375
Field id  92438 percentage  0.004669189453125
Field id  56442 percentage  

[Parallel(n_jobs=-1)]: Done 2386 tasks      | elapsed:   49.9s


Field id  109665 percentage  0.0044403076171875
Field id  101535 percentage  0.05401611328125
Field id  1546 percentage  0.0085601806640625
Field id  20518 percentage  0.002777099609375
Field id  42026 percentage  0.0064849853515625
Field id  82836 percentage  0.005584716796875
Field id  88295 percentage  4.57763671875e-05
Field id  49154 percentage  0.001953125
Field id  103691 percentage  0.026153564453125
Field id  65262 percentage  0.0089874267578125
Field id  8592 percentage  0.002777099609375
Field id  10133 percentage  0.0020904541015625
Field id  119064 percentage  0.003875732421875
Field id  12088 percentage  0.0009765625
Field id  100169 percentage  0.0021820068359375
Field id  56130 percentage  0.0442352294921875
Field id  119899 percentage  0.0249786376953125
Field id  15489 percentage  0.00048828125
Field id  117244 percentage  0.0051116943359375
Field id  24031 percentage  0.0088653564453125
Field id  67775 percentage  0.0012969970703125
Field id  116935 percentage  0.007

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Field id  33348 percentage  0.0306549072265625
Field id  98426 percentage  0.001495361328125
Field id  112887 percentage  0.002197265625
Field id  92591 percentage  0.0013427734375
Field id  104466 percentage  0.0027008056640625
Field id  110578 percentage  0.02105712890625
Field id  2843 percentage  0.004486083984375
Field id  97858 percentage  0.082733154296875
Field id  24323 percentage  0.0003814697265625
Field id  2746 percentage  0.0009307861328125
Field id  85889 percentage  0.0026702880859375
Field id  27978 percentage  3.0517578125e-05
Field id  111400 percentage  0.0055694580078125
Field id  37131 percentage  0.003021240234375
Field id  29618 percentage  0.0520477294921875
Field id  72628 percentage  0.0043182373046875
Field id  114153 percentage  0.00018310546875
Field id  2653 percentage  0.0005645751953125
Field id  94608 percentage  0.01446533203125
Field id  52035 percentage  0.0091705322265625
Field id  102513 percentage  0.0003509521484375
Field id  110634 percentage  

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1/1 [01:27<00:00, 87.66s/it]

87070





In [None]:
df.head(10)

Unnamed: 0,Field_id,size_of_field
0,1,9.490967
1,2,0.350952
2,3,11.871338
3,4,2.304077
4,6,2.685547
5,7,6.393433
6,8,45.471191
7,9,18.539429
8,12,0.244141
9,13,1.144409


In [None]:
df.to_csv(f'/root/sentinel2_updated/Train_data_prep/merged_train_data/size_of_field_train.csv',index=False)

In [None]:
df['size_of_field'].describe()

count    87070.000000
mean        11.527811
std         20.558929
min          0.015259
25%          1.510620
50%          3.906250
75%         12.039185
max        509.521484
Name: size_of_field, dtype: float64