In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd drive/MyDrive/Landsat7_time_series

/content/drive/.shortcut-targets-by-id/1T0ZHz2hBMlKL9gRA9kSrX6BB7GGwmKw0/Landsat7_time_series


In [None]:
!pip install sktime[all_extras]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sktime[all_extras]
  Downloading sktime-0.12.1-py3-none-any.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 4.3 MB/s 
[?25hCollecting numba>=0.53
  Downloading numba-0.55.2-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 37.8 MB/s 
[?25hCollecting statsmodels>=0.12.1
  Downloading statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[K     |████████████████████████████████| 9.8 MB 49.9 MB/s 
[?25hCollecting deprecated>=1.2.13
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Collecting pyod>=0.8.0
  Downloading pyod-1.0.2.tar.gz (122 kB)
[K     |████████████████████████████████| 122 kB 61.7 MB/s 
[?25hCollecting stumpy>=1.5.1
  Downloading stumpy-1.11.1-py3-none-any.whl (136 kB)
[K     |████████████████████████████████| 136 kB 60.5 MB/s 
[?25h

In [None]:
import pandas as pd
import datetime

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 12]
plt.rcParams['figure.dpi'] = 72

**Utility Functions**

In [None]:
def read(name):
  temp = pd.read_csv(name)
  temp['DOY'] = pd.to_datetime(temp['timestamp'], unit='ms')    ## Adding a readable date time column 
  return temp

In [None]:
def sort_group_NDVI(df, threshold,id):
  if (id):
    df = df.sort_values(['osm_id','timestamp'])
    processed_NDVI_df = df.groupby([pd.Grouper(key='DOY', freq="MS"),'osm_id'])['NDVI'].mean().reset_index(name='NDVI')
    counts_NDVI_df = processed_NDVI_df.groupby(['osm_id'])['DOY'].count().reset_index(name='count')
    filtered_NDVI_df = counts_NDVI_df[counts_NDVI_df['count']>=threshold]
    #print(len(filtered_NDVI_df))
    return processed_NDVI_df[processed_NDVI_df['osm_id'].isin(filtered_NDVI_df['osm_id'].values)], filtered_NDVI_df
  else:
    df = df.sort_values(['.geo','timestamp'])
    processed_NDVI_df = df.groupby([pd.Grouper(key='DOY', freq="MS"),'.geo'])['NDVI'].mean().reset_index(name='NDVI')
    counts_NDVI_df = processed_NDVI_df.groupby(['.geo'])['DOY'].count().reset_index(name='count')
    filtered_NDVI_df = counts_NDVI_df[counts_NDVI_df['count']>=threshold]
    #print(counts_NDVI_df)
    #print(len(filtered_NDVI_df))
    return processed_NDVI_df[processed_NDVI_df['.geo'].isin(filtered_NDVI_df['.geo'].values)], filtered_NDVI_df

In [None]:
def interpolate(df,id):
  rng = pd.date_range('2018-01-01','2020-12-31', freq = 'MS')
  if (id):
    mux = pd.MultiIndex.from_product([df['osm_id'].unique(), rng], names=['osm_id','DOY'])
    df = df.set_index(['osm_id','DOY']).reindex(mux).reset_index()
    #interpolate per groups
    df['NDVI'] = (df.groupby('osm_id')['NDVI']
                     .apply(lambda x: x.interpolate(method='linear', limit_direction='both')))
    return df
  else:
    mux = pd.MultiIndex.from_product([df['.geo'].unique(), rng], names=['.geo','DOY'])
    df = df.set_index(['.geo','DOY']).reindex(mux).reset_index()
    #interpolate per groups
    df['NDVI'] = (df.groupby('.geo')['NDVI']
                     .apply(lambda x: x.interpolate(method='linear', limit_direction='both')))
    return df

In [None]:
def F1_4(preds,y,t,label):
  tp=0
  fp=0
  fn=0
  for i in range(len(y)):
    if (preds[i] == t and y[i] == t):
      tp+=1
    if (preds[i] == t and y[i] != t):
      fp+=1
    if (preds[i] != t and y[i] == t):
      fn+=1
  if (fp+tp != 0):
    pr = tp/(fp+tp)
  else:
    pr=0
  if (fn+tp != 0):
    re = tp/(fn+tp)
  else:
    re=0
  if (pr+re != 0):
    f1=2*pr*re/(pr+re)
  else:
    f1=0
  print("Precision for ",label,": ", pr)
  print("Recall for ",label,": ", re)
  print("F1 Score for ",label,": ", f1)

def pred_and_report(model,X,y,labels):
  preds = model.predict(X)
  acc = 0
  for i in range(len(y)):
    if (preds[i] == y[i]):
      acc += 1
  print("Accuracy: ", acc/len(X))
  for i in range(len(labels)):
    F1_4(preds,y,i,labels[i])

**Labels**

In [None]:
farm = [9,10,11,12,22,23,28,29,30,31,32,33,34,35,36,37,38,40,41,43,44,46,48,49,50,51,52,54,60,61,62,63,64,66,67,69,70,71,72,73,75,76,77,79,81,82,84,85,86]
forest = [1,2,68,96,97]
mixed = [18,19,24,45,78,80]
grass = [88,89,90,91,95]
noisy = [29,24,30,32,36,38,40,44,46,48,51,52]

In [None]:

#labels in labels.csv file are osm ids

label_help = pd.read_csv('labels.csv')    #maps labels to geo
print(label_help)
labels = label_help.to_dict()    

#print(labels['label'][29])
labels = labels['label']

orig = labels   #orig maps index to labels
labels = {x:y for y,x in labels.items()}
print(orig)

print(labels)   #labels maps label to index 
#labels

    system:index      label                                    .geo
0              0    1587712  {"type":"MultiPoint","coordinates":[]}
1              1  150926108  {"type":"MultiPoint","coordinates":[]}
2              2  150926112  {"type":"MultiPoint","coordinates":[]}
3              3  343122095  {"type":"MultiPoint","coordinates":[]}
4              4  385133710  {"type":"MultiPoint","coordinates":[]}
..           ...        ...                                     ...
93            93  419912251  {"type":"MultiPoint","coordinates":[]}
94            94  419913475  {"type":"MultiPoint","coordinates":[]}
95            95  419918719  {"type":"MultiPoint","coordinates":[]}
96            96  423594858  {"type":"MultiPoint","coordinates":[]}
97            97  423734337  {"type":"MultiPoint","coordinates":[]}

[98 rows x 3 columns]
{0: 1587712, 1: 150926108, 2: 150926112, 3: 343122095, 4: 385133710, 5: 517715920, 6: 518297895, 7: 519689360, 8: 184103301, 9: 751346174, 10: 751346381, 11: 752

#**Pre-Processing**

##Read pixels

In [None]:
#Test
test_pixels = read("/content/drive/MyDrive/Pixels/pixels.csv")
print(test_pixels.head())


                                  system:index   B1   B2   B3    B4    B5  \
0  LE07_141048_20010125_00000000000000000001_0  554  672  757  1405  1745   
1  LE07_141048_20010125_00000000000000000001_1  511  649  694  1318  1556   
2  LE07_141048_20010125_00000000000000000001_2  554  601  673  1289  1394   
3  LE07_141048_20010125_00000000000000000001_3  511  601  631  1260  1421   
4  LE07_141048_20010125_00000000000000000001_4  533  626  736  1376  1691   

     B6    B7      NDBI      NDVI      NDWI                  SENSING_TIME  \
0  2980  1101  0.107937  0.299723 -0.352913  2001-01-25T04:41:39.6653809Z   
1  2985   908  0.082811  0.310139 -0.340112  2001-01-25T04:41:39.6653809Z   
2  2975   824  0.039135  0.313965 -0.364021  2001-01-25T04:41:39.6653809Z   
3  2970   796  0.060052  0.332628 -0.354111  2001-01-25T04:41:39.6653809Z   
4  2985   990  0.102706  0.303030 -0.374625  2001-01-25T04:41:39.6653809Z   

      timestamp                                               .geo  \
0  9

In [None]:
pixels = read('all_pixel.csv')
print(pixels.head())
farm_ids = [orig[i] for i in farm]    #osm ids for farm
forest_ids = [orig[i] for i in forest]    #osm ids for forest
farm_pixels = pixels[pixels['osm_id'].isin(farm_ids)]
print(len(farm_pixels))
farm_pixels.head()

                                  system:index   B1   B2   B3    B4    B5  \
0  LE07_139042_20181009_00000000000000000054_0  643  741  560  2950  1516   
1  LE07_139042_20181009_00000000000000000054_1  611  741  560  2992  1441   
2  LE07_139042_20181009_00000000000000000054_2  675  809  560  3033  1594   
3  LE07_139042_20181009_00000000000000000054_3  513  741  622  2908  1517   
4  LE07_139042_20181009_00000000000000000054_4  642  741  590  3076  1517   

     B6   B7      NDBI      NDVI      NDWI                  SENSING_TIME  \
0  2980  681 -0.321093  0.680912 -0.598483  2018-10-09T04:33:43.9554309Z   
1  2980  603 -0.349876  0.684685 -0.603000  2018-10-09T04:33:43.9554309Z   
2  2980  681 -0.311001  0.688283 -0.578865  2018-10-09T04:33:43.9554309Z   
3  2980  642 -0.314350  0.647592 -0.593861  2018-10-09T04:33:43.9554309Z   
4  2980  721 -0.339430  0.678123 -0.611737  2018-10-09T04:33:43.9554309Z   

      osm_id     timestamp                                               .geo  \

Unnamed: 0,system:index,B1,B2,B3,B4,B5,B6,B7,NDBI,NDVI,NDWI,SENSING_TIME,osm_id,timestamp,.geo,DOY
213202,LE07_143049_20180106_00000000000000000046_0,795,930,990,3845,2259,3015,1098,-0.25983,0.590486,-0.610471,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213203,LE07_143049_20180106_00000000000000000046_1,861,1101,1365,3127,2970,3025,1923,-0.02575,0.392253,-0.479186,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213204,LE07_143049_20180106_00000000000000000046_2,828,1033,1303,3442,3026,3020,1866,-0.064317,0.45079,-0.538324,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213205,LE07_143049_20180106_00000000000000000046_3,828,1033,1209,3442,2615,3025,1468,-0.136536,0.480112,-0.538324,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213206,LE07_143049_20180106_00000000000000000046_4,861,1101,1522,2992,3381,3034,2293,0.061039,0.325654,-0.462008,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520


In [None]:
barren_pixels = read('L7_barren_pixel.csv')
print(len(barren_pixels))
barren_pixels.head()

1218825


Unnamed: 0,system:index,B1,B2,B3,B4,B5,B6,B7,NDBI,NDVI,NDWI,SENSING_TIME,osm_id,timestamp,.geo,DOY
0,LE07_139041_20180110_00000000000000001655_0,473,654,641,2089,2240,2913,1285,0.034881,0.530403,-0.52315,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
1,LE07_139041_20180110_00000000000000001655_1,473,608,641,2054,2112,2913,1187,0.013922,0.524304,-0.543201,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
2,LE07_139041_20180110_00000000000000001655_2,473,654,600,2089,1953,2908,1025,-0.033647,0.553737,-0.52315,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
3,LE07_139041_20180110_00000000000000001655_3,516,608,600,2054,1794,2908,927,-0.067568,0.547852,-0.543201,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
4,LE07_139041_20180110_00000000000000001655_4,473,608,600,2054,1666,2908,894,-0.104301,0.547852,-0.543201,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128


In [None]:
water_pixels = read('L7_water_pixel.csv')
print(len(water_pixels))
water_pixels.head()

1006986


Unnamed: 0,system:index,B1,B2,B3,B4,B5,B6,B7,NDBI,NDVI,NDWI,SENSING_TIME,osm_id,timestamp,.geo,DOY
0,LE07_139044_20201030_00000000000000000072_0,417,507,277,436,216,2703,60,-0.337423,0.223001,0.075292,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
1,LE07_139044_20201030_00000000000000000072_1,454,467,312,436,172,2924,106,-0.434211,0.165775,0.03433,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
2,LE07_139044_20201030_00000000000000000072_2,454,426,312,436,172,2939,106,-0.434211,0.165775,-0.011601,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
3,LE07_139044_20201030_00000000000000000072_3,453,467,348,436,172,2995,106,-0.434211,0.112245,0.03433,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
4,LE07_139044_20201030_00000000000000000072_4,453,467,312,436,172,2955,13,-0.434211,0.165775,0.03433,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936


In [None]:
forest_pixels = pixels[pixels['osm_id'].isin(forest_ids)]
temp = read('all_geo_fpixel_L7.csv')
forest_pixels = forest_pixels.append(temp)
print(len(forest_pixels))

6681825


## Sorting Groups by NDVI

**Univariate (NDVI)**

In [None]:
pixels_farm_L7, vals_pixels_farm_L7 = sort_group_NDVI(farm_pixels,24,False)
print(len(pixels_farm_L7))
print(pixels_farm_L7)
print(vals_pixels_farm_L7)

665560
               DOY                                               .geo  \
0       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
1       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
2       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
3       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
4       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
...            ...                                                ...   
1285248 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
1285249 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
1285250 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
1285251 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
1285252 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   

             NDVI  
0        0.173840  
1        0.196518  
2        0.196518  
3        0.141655  
4        0.15315

In [None]:
pixels_forest_L7, vals_pixels_forest_L7 = sort_group_NDVI(forest_pixels,16,False)

print("pixels_forest_L7: ",pixels_forest_L7)    # stores the DOY, geo and NDVI values 

print("vals_pixels_forest_L7: ",vals_pixels_forest_L7)    # stores the count of each geometry




pixels_forest_L7:                 DOY                                               .geo  \
0       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
1       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
2       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
3       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
4       2018-01-01  {"geodesic":false,"type":"Point","coordinates"...   
...            ...                                                ...   
4888323 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
4888324 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
4888325 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
4888326 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   
4888327 2020-12-01  {"geodesic":false,"type":"Point","coordinates"...   

             NDVI  
0        0.193798  
1        0.157088  
2        0.204357  
3        0.167044  
4   

In [None]:
pixels_bL7, vals_pixels_bL7 = sort_group_NDVI(barren_pixels,24,False)
print(len(pixels_bL7))

171630


In [None]:
pixels_wL7, vals_pixels_wL7 = sort_group_NDVI(water_pixels,24,False)
print(len(pixels_wL7))

19966


In [None]:
geo_to_label = farm_pixels[["osm_id",".geo"]].set_index('.geo').append(barren_pixels[["osm_id",".geo"]].set_index('.geo'))
geo_to_label = geo_to_label.append(water_pixels[["osm_id",".geo"]].set_index('.geo'))
print(geo_to_label.head())
geo_to_label = geo_to_label.to_dict() # a dict which maps osmids to geo (osmids are keys and geo are values)
geo_to_label.keys()

                                                       osm_id
.geo                                                         
{"geodesic":false,"type":"Point","coordinates":...  696251987
{"geodesic":false,"type":"Point","coordinates":...  696251987
{"geodesic":false,"type":"Point","coordinates":...  696251987
{"geodesic":false,"type":"Point","coordinates":...  696251987
{"geodesic":false,"type":"Point","coordinates":...  696251987


dict_keys(['osm_id'])

**Interpolate**

In [None]:
ifarmpL7 = interpolate(pixels_farm_L7,False)
iforestpL7 = interpolate(pixels_forest_L7,False)
ibarrenpL7 = interpolate(pixels_bL7,False)
iwaterpL7 = interpolate(pixels_wL7,False)
print(len(ifarmpL7))
print(len(iforestpL7))
print(len(ibarrenpL7))
print(len(iwaterpL7))

932040
435852
245772
29340


Generating training ready datasets

In [None]:
farm_X=[]
forest_X=[]
barren_X=[]
water_X=[]

print("ungrouped forest: ", iforestpL7)

gfarmp = ifarmpL7.groupby('.geo')
gforestp = iforestpL7.groupby('.geo')   # grouping the pixels by geo. that is each group contains the same geo(pixel) over different times
gbarrenp = ibarrenpL7.groupby('.geo')
gwaterp = iwaterpL7.groupby('.geo')


for geo,group in gfarmp:
  farm_X.append(group['NDVI'].values)
for geo,group in gforestp:
  print("geo : ", geo)
  print("group : ", group)
  forest_X.append(group['NDVI'].values)
for geo,group in gbarrenp:
  barren_X.append(group['NDVI'].values)
for geo,group in gwaterp:
  water_X.append(group['NDVI'].values)
print(len(farm_X))
print(len(forest_X))
print(len(barren_X))
print(len(water_X))


print(forest_X[:10])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
430537  {"geodesic":false,"type":"Point","coordinates"... 2019-02-01  0.546496
430538  {"geodesic":false,"type":"Point","coordinates"... 2019-03-01  0.562081
430539  {"geodesic":false,"type":"Point","coordinates"... 2019-04-01  0.449116
430540  {"geodesic":false,"type":"Point","coordinates"... 2019-05-01  0.418219
430541  {"geodesic":false,"type":"Point","coordinates"... 2019-06-01  0.435820
430542  {"geodesic":false,"type":"Point","coordinates"... 2019-07-01  0.484795
430543  {"geodesic":false,"type":"Point","coordinates"... 2019-08-01  0.533770
430544  {"geodesic":false,"type":"Point","coordinates"... 2019-09-01  0.582745
430545  {"geodesic":false,"type":"Point","coordinates"... 2019-10-01  0.631720
430546  {"geodesic":false,"type":"Point","coordinates"... 2019-11-01  0.680695
430547  {"geodesic":false,"type":"Point","coordinates"... 2019-12-01  0.632608
430548  {"geodesic":false,"type":"Point","coordinates"... 2020-01-

#Model Training

In [None]:

import numpy as np

farm_X = np.asarray(farm_X).reshape(len(farm_X),1,36)
forest_X = np.asarray(forest_X).reshape(len(forest_X),1,36)
barren_X = np.asarray(barren_X).reshape(len(barren_X),1,36)
water_X = np.asarray(water_X).reshape(len(water_X),1,36)

print(forest_X)

[[[0.19379807 0.18272275 0.14811023 ... 0.29877255 0.28129268 0.27551803]]

 [[0.15708809 0.15682371 0.13633201 ... 0.28006002 0.23939393 0.22932066]]

 [[0.2043571  0.14815642 0.14077717 ... 0.31472264 0.27302027 0.30182767]]

 ...

 [[0.40861096 0.39552455 0.28498412 ... 0.7219986  0.64551106 0.54718113]]

 [[0.42066928 0.37923274 0.29547855 ... 0.70801077 0.72118522 0.554168  ]]

 [[0.37684604 0.3703485  0.27521277 ... 0.67210823 0.62897611 0.48575327]]]


In [None]:
import sktime

from sktime.transformations.panel.summarize import RandomIntervalFeatureExtractor
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sktime.utils.slope_and_trend import _slope

from sktime.classification.compose import ComposableTimeSeriesForestClassifier


from sktime.datatypes._panel._convert import (
    from_3d_numpy_to_nested,
)

farm_X_nested = from_3d_numpy_to_nested(farm_X)
len(farm_X_nested)

25890

In [None]:
forest_X_nested = from_3d_numpy_to_nested(forest_X)
len(forest_X_nested)

12107

In [None]:
barren_X_nested = from_3d_numpy_to_nested(barren_X)
len(barren_X_nested)

6827

In [None]:
water_X_nested = from_3d_numpy_to_nested(water_X)
len(water_X_nested)

815

In [None]:
y_train =[0 for i in range((4*len(farm_X_nested))//5)] + [1 for i in range((4*len(forest_X_nested))//5)] #+ [2 for i in range((4*len(barren_X_nested))//5)] + [3 for i in range((4*len(water_X_nested))//5)]
y_test =[0 for i in range((len(farm_X_nested))//5)] + [1 for i in range((len(forest_X_nested))//5)] #+ [2 for i in range((len(barren_X_nested))//5)] + [3 for i in range((len(water_X_nested))//5)]

In [None]:
farm_X_test = farm_X_nested.head(4*len(farm_X_nested)//5)
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([farm_X_test, forest_X_test], ignore_index=True)
X_train

Unnamed: 0,var_0
0,0 0.174187 1 0.174187 2 0.180514 3...
1,0 0.166203 1 0.166203 2 0.174591 3...
2,0 0.174308 1 0.174308 2 0.174591 3...
3,0 0.162735 1 0.162735 2 0.186563 3...
4,0 0.164157 1 0.164157 2 0.157216 3...
...,...
30392,0 0.450493 1 0.441723 2 0.314187 3...
30393,0 0.477550 1 0.435846 2 0.303028 3...
30394,0 0.467875 1 0.459932 2 0.320518 3...
30395,0 0.476831 1 0.474699 2 0.328447 3...


In [None]:
farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([farm_X_test, forest_X_test], ignore_index=True)
X_test

Unnamed: 0,var_0
0,0 0.169118 1 0.194697 2 0.151170 3...
1,0 0.185425 1 0.244043 2 0.144904 3...
2,0 0.294294 1 0.346191 2 0.146226 3...
3,0 0.376050 1 0.274332 2 0.174566 3...
4,0 0.186569 1 0.196424 2 0.146710 3...
...,...
7594,0 0.323260 1 0.391179 2 0.307624 3...
7595,0 0.417996 1 0.368847 2 0.274863 3...
7596,0 0.408611 1 0.395525 2 0.284984 3...
7597,0 0.420669 1 0.379233 2 0.295479 3...


#**Non Forest vs Forest**

In [None]:
y_train =[0 for i in range((4*len(barren_X_nested))//5)] + [1 for i in range((4*len(forest_X_nested))//5)] 
y_test =[0 for i in range((len(barren_X_nested))//5)] + [1 for i in range((len(forest_X_nested))//5)] 

In [None]:
#farm_X_test = farm_X_nested.head(4*len(farm_X_nested)//5)
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
#water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([barren_X_test, forest_X_test], ignore_index=True)
X_train

Unnamed: 0,var_0
0,0 0.131134 1 0.147611 2 0.141589 3...
1,0 0.137333 1 0.144555 2 0.148570 3...
2,0 0.162137 1 0.174472 2 0.176908 3...
3,0 0.112954 1 0.117082 2 0.109699 3...
4,0 0.125956 1 0.136572 2 0.127922 3...
...,...
15141,0 0.450493 1 0.441723 2 0.314187 3...
15142,0 0.477550 1 0.435846 2 0.303028 3...
15143,0 0.467875 1 0.459932 2 0.320518 3...
15144,0 0.476831 1 0.474699 2 0.328447 3...


In [None]:
#farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
#water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([barren_X_test, forest_X_test], ignore_index=True)
X_test

Unnamed: 0,var_0
0,0 0.259121 1 0.259121 2 0.168049 3...
1,0 0.239922 1 0.239922 2 0.153444 3...
2,0 0.244999 1 0.244999 2 0.175000 3...
3,0 0.241379 1 0.241379 2 0.168049 3...
4,0 0.357977 1 0.320685 2 0.270955 3...
...,...
3781,0 0.323260 1 0.391179 2 0.307624 3...
3782,0 0.417996 1 0.368847 2 0.274863 3...
3783,0 0.408611 1 0.395525 2 0.284984 3...
3784,0 0.420669 1 0.379233 2 0.295479 3...


In [None]:
from sktime.classification.interval_based import TimeSeriesForestClassifier

tsf_11 = TimeSeriesForestClassifier(n_estimators=200, random_state=42)

tsf_11.fit(X_train, np.array(y_train))

TimeSeriesForestClassifier(random_state=42)

In [None]:
tsf_11.predict(X_test)

array([0, 0, 0, ..., 1, 1, 1])

In [None]:
pred_and_report(tsf_11,X_test,y_test,["Non Forest","Forest"]) 

Accuracy:  0.9973586899101955
Precision for  Non Forest :  0.9927272727272727
Recall for  Non Forest :  1.0
F1 Score for  Non Forest :  0.9963503649635036
Precision for  Forest :  1.0
Recall for  Forest :  0.9958694754233788
F1 Score for  Forest :  0.997930463576159


##**Tabularisation**

In [None]:
y_train = [0 for i in range((4*len(forest_X_nested))//5)] + [1 for i in range((4*len(water_X_nested))//5)]
y_test = [0 for i in range((len(forest_X_nested))//5)] + [1 for i in range((len(water_X_nested))//5)]

In [None]:
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
#barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([forest_X_test, water_X_test], ignore_index=True)
X_train

In [None]:
#farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
#barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([forest_X_test, water_X_test], ignore_index=True)
X_test

In [None]:
from sklearn.ensemble import RandomForestClassifier

from sktime.datatypes._panel._convert import from_nested_to_2d_array

X_train_tab = from_nested_to_2d_array(X_train)
X_test_tab = from_nested_to_2d_array(X_test)

In [None]:
X_train_tab.head()

In [None]:
X_test_tab.head()

In [None]:
tab = RandomForestClassifier(n_estimators=200, random_state=42)
tab.fit(X_train_tab, np.array(y_train))

In [None]:
pred_and_report(tab,X_test_tab,y_test,["Forest","Water"])

In [None]:
y_train = [0 for i in range((4*len(forest_X_nested))//5)] + [1 for i in range((4*len(barren_X_nested))//5)] + [2 for i in range((4*len(water_X_nested))//5)]
y_test = [0 for i in range((len(forest_X_nested))//5)] + [1 for i in range((len(barren_X_nested))//5)] + [2 for i in range((len(water_X_nested))//5)]

In [None]:
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([forest_X_test, barren_X_test, water_X_test], ignore_index=True)
X_train

In [None]:
#farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([forest_X_test, barren_X_test, water_X_test], ignore_index=True)
X_test

In [None]:
from sklearn.ensemble import RandomForestClassifier

from sktime.datatypes._panel._convert import from_nested_to_2d_array

X_train_tab = from_nested_to_2d_array(X_train)
X_test_tab = from_nested_to_2d_array(X_test)

In [None]:
tab = RandomForestClassifier(n_estimators=500, random_state=42)
tab.fit(X_train_tab, np.array(y_train))

In [None]:
pred_and_report(tab,X_test_tab,y_test,["Forest","Barren","Water"])

##**Feature Extractor**

In [None]:
! pip install numba>=0.54

In [None]:
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor

transformer = TSFreshFeatureExtractor(default_fc_parameters="minimal")
extracted_features = transformer.fit_transform(X_train)
extracted_features.head()

In [None]:
from sklearn.ensemble import RandomForestClassifier

tse = RandomForestClassifier(n_estimators=1000, random_state=42)
tse.fit(extracted_features, np.array(y_train))

In [None]:
test_e = transformer.transform(X_test)
pred_and_report(tse,test_e,y_test,["Forest", "Barren", "Water"])

#**Other Classifiers**

In [None]:
#DrCIF
#too slow

from sktime.classification.interval_based import DrCIF

drcif = DrCIF(n_estimators=3, n_intervals=2, att_subsample_size=2)
drcif.fit(X_train, np.array(y_train))

In [None]:
#TDE
#too slow

from sktime.classification.dictionary_based import TemporalDictionaryEnsemble

clf_tde = TemporalDictionaryEnsemble(
    n_parameter_samples=20,
    max_ensemble_size=3,
    randomly_selected_params=5,
)

clf_tde.fit(X_train, np.array(y_train))

In [None]:
#cBOSS
#too slow

from sktime.classification.dictionary_based import ContractableBOSS

clf = ContractableBOSS(n_parameter_samples=20, max_ensemble_size=3)
clf.fit(X_train, np.array(y_train))
clf.score(X_test, np.array(y_test))

In [None]:
#KNN with DTW
#too slow

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, distance="dtw")
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

In [None]:
#Proximity Forest
#too slow


from sktime.classification.distance_based import ProximityForest

clf_pf = ProximityForest(n_estimators=2, max_depth=2, n_stump_evaluations=1)

clf_pf.fit(X_train, np.array(y_train))

In [None]:
#HIVECOTEV2
#too slow

from sktime.classification.hybrid import HIVECOTEV2
from sktime._contrib.vector_classifiers._rotation_forest import RotationForest

clf_hv2 = HIVECOTEV2(
    stc_params={
        "estimator": RotationForest(n_estimators=3),
        "n_shapelet_samples": 100,
        "max_shapelets": 10,
        "batch_size": 20,
    },
    drcif_params={"n_estimators": 2, "n_intervals": 2, "att_subsample_size": 2},
    arsenal_params={"num_kernels": 50, "n_estimators": 3},
    tde_params={
        "n_parameter_samples": 10,
        "max_ensemble_size": 3,
        "randomly_selected_params": 5,
    },
)

clf_hv2.fit(X_train, np.array(y_train))