In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd drive/MyDrive/Landsat7_time_series

/content/drive/MyDrive/Landsat7_time_series


In [None]:
!pip install sktime[all_extras]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import datetime

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 12]
plt.rcParams['figure.dpi'] = 72

**Utility Functions**

In [None]:
def read(name):
  temp = pd.read_csv(name)
  temp['DOY'] = pd.to_datetime(temp['timestamp'], unit='ms')
  return temp

In [None]:
def sort_group_NDVI(df, threshold,id):
  if (id):
    df = df.sort_values(['osm_id','timestamp'])
    processed_NDVI_df = df.groupby([pd.Grouper(key='DOY', freq="MS"),'osm_id'])['NDVI'].mean().reset_index(name='NDVI')
    counts_NDVI_df = processed_NDVI_df.groupby(['osm_id'])['DOY'].count().reset_index(name='count')
    filtered_NDVI_df = counts_NDVI_df[counts_NDVI_df['count']>=threshold]
    print(len(filtered_NDVI_df))
    return processed_NDVI_df[processed_NDVI_df['osm_id'].isin(filtered_NDVI_df['osm_id'].values)], filtered_NDVI_df
  else:
    df = df.sort_values(['.geo','timestamp'])
    processed_NDVI_df = df.groupby([pd.Grouper(key='DOY', freq="MS"),'.geo'])['NDVI'].mean().reset_index(name='NDVI')
    counts_NDVI_df = processed_NDVI_df.groupby(['.geo'])['DOY'].count().reset_index(name='count')
    filtered_NDVI_df = counts_NDVI_df[counts_NDVI_df['count']>=threshold]
    #print(counts_NDVI_df)
    print(len(filtered_NDVI_df))
    return processed_NDVI_df[processed_NDVI_df['.geo'].isin(filtered_NDVI_df['.geo'].values)], filtered_NDVI_df

In [None]:
def interpolate(df,id):
  rng = pd.date_range('2018-01-01','2020-12-31', freq = 'MS')
  if (id):
    mux = pd.MultiIndex.from_product([df['osm_id'].unique(), rng], names=['osm_id','DOY'])
    df = df.set_index(['osm_id','DOY']).reindex(mux).reset_index()
    #interpolate per groups
    df['NDVI'] = (df.groupby('osm_id')['NDVI']
                     .apply(lambda x: x.interpolate(method='linear', limit_direction='both')))
    return df
  else:
    mux = pd.MultiIndex.from_product([df['.geo'].unique(), rng], names=['.geo','DOY'])
    df = df.set_index(['.geo','DOY']).reindex(mux).reset_index()
    #interpolate per groups
    df['NDVI'] = (df.groupby('.geo')['NDVI']
                     .apply(lambda x: x.interpolate(method='linear', limit_direction='both')))
    return df

In [None]:
def F1_4(preds,y,t,label):
  tp=0
  fp=0
  fn=0
  for i in range(len(y)):
    if (preds[i] == t and y[i] == t):
      tp+=1
    if (preds[i] == t and y[i] != t):
      fp+=1
    if (preds[i] != t and y[i] == t):
      fn+=1
  if (fp+tp != 0):
    pr = tp/(fp+tp)
  else:
    pr=0
  if (fn+tp != 0):
    re = tp/(fn+tp)
  else:
    re=0
  if (pr+re != 0):
    f1=2*pr*re/(pr+re)
  else:
    f1=0
  print("Precision for ",label,": ", pr)
  print("Recall for ",label,": ", re)
  print("F1 Score for ",label,": ", f1)

def pred_and_report(model,X,y,labels):
  preds = model.predict(X)
  acc = 0
  for i in range(len(y)):
    if (preds[i] == y[i]):
      acc += 1
  print("Accuracy: ", acc/len(X))
  for i in range(len(labels)):
    F1_4(preds,y,i,labels[i])

**Labels**

In [None]:
farm = [9,10,11,12,22,23,28,29,30,31,32,33,34,35,36,37,38,40,41,43,44,46,48,49,50,51,52,54,60,61,62,63,64,66,67,69,70,71,72,73,75,76,77,79,81,82,84,85,86]
forest = [1,2,68,96,97]
mixed = [18,19,24,45,78,80]
grass = [88,89,90,91,95]
noisy = [29,24,30,32,36,38,40,44,46,48,51,52]

In [None]:
label_help = pd.read_csv('labels.csv')
labels = label_help.to_dict()
#print(labels['label'][29])
labels = labels['label']
orig = labels
labels = {x:y for y,x in labels.items()}
#labels

**Pre-Processing**

In [None]:
pixels = read('all_pixel.csv')
farm_ids = [orig[i] for i in farm]
forest_ids = [orig[i] for i in forest]
farm_pixels = pixels[pixels['osm_id'].isin(farm_ids)]
print(len(farm_pixels))
farm_pixels.head()

2261109


Unnamed: 0,system:index,B1,B2,B3,B4,B5,B6,B7,NDBI,NDVI,NDWI,SENSING_TIME,osm_id,timestamp,.geo,DOY
213202,LE07_143049_20180106_00000000000000000046_0,795,930,990,3845,2259,3015,1098,-0.25983,0.590486,-0.610471,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213203,LE07_143049_20180106_00000000000000000046_1,861,1101,1365,3127,2970,3025,1923,-0.02575,0.392253,-0.479186,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213204,LE07_143049_20180106_00000000000000000046_2,828,1033,1303,3442,3026,3020,1866,-0.064317,0.45079,-0.538324,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213205,LE07_143049_20180106_00000000000000000046_3,828,1033,1209,3442,2615,3025,1468,-0.136536,0.480112,-0.538324,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520
213206,LE07_143049_20180106_00000000000000000046_4,861,1101,1522,2992,3381,3034,2293,0.061039,0.325654,-0.462008,2018-01-06T05:05:59.5245116Z,696251987,1515215000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-06 05:05:59.520


In [None]:
barren_pixels = read('L7_barren_pixel.csv')
print(len(barren_pixels))
barren_pixels.head()

1218825


Unnamed: 0,system:index,B1,B2,B3,B4,B5,B6,B7,NDBI,NDVI,NDWI,SENSING_TIME,osm_id,timestamp,.geo,DOY
0,LE07_139041_20180110_00000000000000001655_0,473,654,641,2089,2240,2913,1285,0.034881,0.530403,-0.52315,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
1,LE07_139041_20180110_00000000000000001655_1,473,608,641,2054,2112,2913,1187,0.013922,0.524304,-0.543201,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
2,LE07_139041_20180110_00000000000000001655_2,473,654,600,2089,1953,2908,1025,-0.033647,0.553737,-0.52315,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
3,LE07_139041_20180110_00000000000000001655_3,516,608,600,2054,1794,2908,927,-0.067568,0.547852,-0.543201,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128
4,LE07_139041_20180110_00000000000000001655_4,473,608,600,2054,1666,2908,894,-0.104301,0.547852,-0.543201,2018-01-10T04:38:01.6291177Z,546617071,1515559000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2018-01-10 04:38:01.630000128


In [None]:
water_pixels = read('L7_water_pixel.csv')
print(len(water_pixels))
water_pixels.head()

1006986


Unnamed: 0,system:index,B1,B2,B3,B4,B5,B6,B7,NDBI,NDVI,NDWI,SENSING_TIME,osm_id,timestamp,.geo,DOY
0,LE07_139044_20201030_00000000000000000072_0,417,507,277,436,216,2703,60,-0.337423,0.223001,0.075292,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
1,LE07_139044_20201030_00000000000000000072_1,454,467,312,436,172,2924,106,-0.434211,0.165775,0.03433,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
2,LE07_139044_20201030_00000000000000000072_2,454,426,312,436,172,2939,106,-0.434211,0.165775,-0.011601,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
3,LE07_139044_20201030_00000000000000000072_3,453,467,348,436,172,2995,106,-0.434211,0.112245,0.03433,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936
4,LE07_139044_20201030_00000000000000000072_4,453,467,312,436,172,2955,13,-0.434211,0.165775,0.03433,2020-10-30T04:00:10.1892406Z,44582115,1604030000000.0,"{""geodesic"":false,""type"":""Point"",""coordinates""...",2020-10-30 04:00:10.188999936


In [None]:
forest_pixels = pixels[pixels['osm_id'].isin(forest_ids)]
temp = read('all_geo_fpixel_L7.csv')
forest_pixels = forest_pixels.append(temp)
print(len(forest_pixels))

6681825


**Univariate (NDVI)**

In [None]:
pixels_farm_L7, vals_pixels_farm_L7 = sort_group_NDVI(farm_pixels,24,False)
print(len(pixels_farm_L7))

25890
665560


In [None]:
pixels_forest_L7, vals_pixels_forest_L7 = sort_group_NDVI(forest_pixels,16,False)
print(len(pixels_forest_L7))

12107
222348


In [None]:
pixels_bL7, vals_pixels_bL7 = sort_group_NDVI(barren_pixels,24,False)
print(len(pixels_bL7))

6827
171630


In [None]:
pixels_wL7, vals_pixels_wL7 = sort_group_NDVI(water_pixels,24,False)
print(len(pixels_wL7))

815
19966


In [None]:
geo_to_label = farm_pixels[["osm_id",".geo"]].set_index('.geo').append(barren_pixels[["osm_id",".geo"]].set_index('.geo'))
geo_to_label = geo_to_label.append(water_pixels[["osm_id",".geo"]].set_index('.geo'))
#geo_to_label.head()
geo_to_label = geo_to_label.to_dict()
geo_to_label.keys()

dict_keys(['osm_id'])

In [None]:
ifarmpL7 = interpolate(pixels_farm_L7,False)
iforestpL7 = interpolate(pixels_forest_L7,False)
ibarrenpL7 = interpolate(pixels_bL7,False)
iwaterpL7 = interpolate(pixels_wL7,False)
print(len(ifarmpL7))
print(len(iforestpL7))
print(len(ibarrenpL7))
print(len(iwaterpL7))

932040
435852
245772
29340


In [None]:
farm_X=[]
forest_X=[]
barren_X=[]
water_X=[]
gfarmp = ifarmpL7.groupby('.geo')
gforestp = iforestpL7.groupby('.geo')
gbarrenp = ibarrenpL7.groupby('.geo')
gwaterp = iwaterpL7.groupby('.geo')
for geo,group in gfarmp:
  farm_X.append(group['NDVI'].values)
for geo,group in gforestp:
  forest_X.append(group['NDVI'].values)
for geo,group in gbarrenp:
  barren_X.append(group['NDVI'].values)
for geo,group in gwaterp:
  water_X.append(group['NDVI'].values)
print(len(farm_X))
print(len(forest_X))
print(len(barren_X))
print(len(water_X))

25890
12107
6827
815


In [None]:
import numpy as np

farm_X = np.asarray(farm_X).reshape(len(farm_X),1,36)
forest_X = np.asarray(forest_X).reshape(len(forest_X),1,36)
barren_X = np.asarray(barren_X).reshape(len(barren_X),1,36)
water_X = np.asarray(water_X).reshape(len(water_X),1,36)

In [None]:
import sktime

from sktime.transformations.panel.summarize import RandomIntervalFeatureExtractor
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sktime.utils.slope_and_trend import _slope

from sktime.classification.compose import ComposableTimeSeriesForestClassifier


from sktime.datatypes._panel._convert import (
    from_3d_numpy_to_nested,
)

farm_X_nested = from_3d_numpy_to_nested(farm_X)
len(farm_X_nested)

25890

In [None]:
forest_X_nested = from_3d_numpy_to_nested(forest_X)
len(forest_X_nested)

12107

In [None]:
barren_X_nested = from_3d_numpy_to_nested(barren_X)
len(barren_X_nested)

6827

In [None]:
water_X_nested = from_3d_numpy_to_nested(water_X)
len(water_X_nested)

815

In [None]:
y_train =[0 for i in range((4*len(farm_X_nested))//5)] + [1 for i in range((4*len(forest_X_nested))//5)] #+ [2 for i in range((4*len(barren_X_nested))//5)] + [3 for i in range((4*len(water_X_nested))//5)]
y_test =[0 for i in range((len(farm_X_nested))//5)] + [1 for i in range((len(forest_X_nested))//5)] #+ [2 for i in range((len(barren_X_nested))//5)] + [3 for i in range((len(water_X_nested))//5)]

In [None]:
farm_X_test = farm_X_nested.head(4*len(farm_X_nested)//5)
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([farm_X_test, forest_X_test], ignore_index=True)
X_train

Unnamed: 0,var_0
0,0 0.174187 1 0.174187 2 0.180514 3...
1,0 0.166203 1 0.166203 2 0.174591 3...
2,0 0.174308 1 0.174308 2 0.174591 3...
3,0 0.162735 1 0.162735 2 0.186563 3...
4,0 0.164157 1 0.164157 2 0.157216 3...
...,...
30392,0 0.450493 1 0.441723 2 0.314187 3...
30393,0 0.477550 1 0.435846 2 0.303028 3...
30394,0 0.467875 1 0.459932 2 0.320518 3...
30395,0 0.476831 1 0.474699 2 0.328447 3...


In [None]:
farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([farm_X_test, forest_X_test], ignore_index=True)
X_test

Unnamed: 0,var_0
0,0 0.169118 1 0.194697 2 0.151170 3...
1,0 0.185425 1 0.244043 2 0.144904 3...
2,0 0.294294 1 0.346191 2 0.146226 3...
3,0 0.376050 1 0.274332 2 0.174566 3...
4,0 0.186569 1 0.196424 2 0.146710 3...
...,...
7594,0 0.323260 1 0.391179 2 0.307624 3...
7595,0 0.417996 1 0.368847 2 0.274863 3...
7596,0 0.408611 1 0.395525 2 0.284984 3...
7597,0 0.420669 1 0.379233 2 0.295479 3...


In [None]:
#Time Series Tree

steps = [
    (
        "extract",
        RandomIntervalFeatureExtractor(
            n_intervals="sqrt", features=[np.mean, np.std, _slope]
        ),
    ),
    ("clf", DecisionTreeClassifier()),
]
time_series_tree = Pipeline(steps)

In [None]:
time_series_tree.fit(X_train, np.array(y_train))

Pipeline(steps=[('extract',
                 RandomIntervalFeatureExtractor(features=[<function mean at 0x7fc4d1379560>,
                                                          <function std at 0x7fc4d13797a0>,
                                                          <function _slope at 0x7fc422b315f0>])),
                ('clf', DecisionTreeClassifier())])

In [None]:
pred_and_report_4(time_series_tree,X_test,y_test)

Accuracy:  0.8792593404185384
Precision for  Farm :  0.9254894506747766
Recall for  Farm :  0.9403244495944381
F1 Score for  Farm :  0.9328479739438644
Precision for  Forest :  0.9417059579939991
Recall for  Forest :  0.9074762494836844
F1 Score for  Forest :  0.9242742953302482
Precision for  Barren :  0.6801218583396801
Recall for  Barren :  0.6542124542124542
F1 Score for  Barren :  0.6669156086631814
Precision for  Water :  0.3
Recall for  Water :  0.4049079754601227
F1 Score for  Water :  0.34464751958224543


In [None]:
#Time Series Forest

from sktime.classification.interval_based import TimeSeriesForestClassifier

tsf = TimeSeriesForestClassifier(n_estimators=200, random_state=42)

tsf.fit(X_train, np.array(y_train))

TimeSeriesForestClassifier(random_state=42)

In [None]:
pred_and_report_2(tsf,X_test,y_test)

Accuracy:  0.9994736149493354
Precision for  Farm :  0.9996137504828119
Recall for  Farm :  0.9996137504828119
F1 Score for  Farm :  0.9996137504828119
Precision for  Forest :  0.9991738950846758
Recall for  Forest :  0.9991738950846758
F1 Score for  Forest :  0.9991738950846758


In [None]:
#RISE

from sktime.classification.interval_based import RandomIntervalSpectralEnsemble

rise = RandomIntervalSpectralEnsemble(n_estimators=50, random_state=42)
rise.fit(X_train, np.array(y_train))

RandomIntervalSpectralEnsemble(n_estimators=50, random_state=42)

In [None]:
pred_and_report_4(rise,X_test,y_test)

Accuracy:  0.8995288703845733
Precision for  Farm :  0.85678517776665
Recall for  Farm :  0.9913093858632677
F1 Score for  Farm :  0.9191512221326887
Precision for  Forest :  0.9932005828071879
Recall for  Forest :  0.8446922759190417
F1 Score for  Forest :  0.9129464285714285
Precision for  Barren :  0.9680957128614157
Recall for  Barren :  0.7113553113553114
F1 Score for  Barren :  0.8201013513513514
Precision for  Water :  0.8243243243243243
Recall for  Water :  0.37423312883435583
F1 Score for  Water :  0.5147679324894515


In [None]:
#Summary

from sktime.classification.feature_based import SummaryClassifier
from sklearn.ensemble import RandomForestClassifier

clf_s = SummaryClassifier(estimator=RandomForestClassifier(n_estimators=50))

clf_s.fit(X_train, np.array(y_train))

SummaryClassifier(estimator=RandomForestClassifier(n_estimators=50))

In [None]:
pred_and_report_4(clf_s,X_test,y_test)

Accuracy:  0.8601950257477813
Precision for  Farm :  0.8857863098700796
Recall for  Farm :  0.8821938972576284
F1 Score for  Farm :  0.8839864537977745
Precision for  Forest :  0.9159240604416893
Recall for  Forest :  0.976456009913259
F1 Score for  Forest :  0.9452219112355058
Precision for  Barren :  0.6722306525037937
Recall for  Barren :  0.6490842490842491
F1 Score for  Barren :  0.6604547148714126
Precision for  Water :  0.4647887323943662
Recall for  Water :  0.20245398773006135
F1 Score for  Water :  0.28205128205128205


In [None]:
#TSFresh

from sktime.classification.feature_based import TSFreshClassifier
from sklearn.ensemble import RandomForestClassifier

clf_tsf = TSFreshClassifier(
    default_fc_parameters="minimal",
    estimator=RandomForestClassifier(n_estimators=50),
)

clf_tsf.fit(X_train, np.array(y_train))

TSFreshClassifier(default_fc_parameters='minimal',
                  estimator=RandomForestClassifier(n_estimators=50))

In [None]:
pred_and_report_4(clf_tsf,X_test,y_test)

Accuracy:  0.8460611372849787
Precision for  Farm :  0.8799763500197083
Recall for  Farm :  0.8623020471224411
F1 Score for  Farm :  0.8710495513070621
Precision for  Forest :  0.8785740809506127
Recall for  Forest :  0.9772821148285832
F1 Score for  Forest :  0.9253030895580759
Precision for  Barren :  0.662015503875969
Recall for  Barren :  0.6256410256410256
F1 Score for  Barren :  0.6433145009416196
Precision for  Water :  0.5285714285714286
Recall for  Water :  0.22699386503067484
F1 Score for  Water :  0.31759656652360513


In [None]:
#Catch22

from sktime.classification.feature_based import Catch22Classifier


clf_22 = Catch22Classifier(
    estimator=RandomForestClassifier(n_estimators=50),
    outlier_norm=True,
)

clf_22.fit(X_train, np.array(y_train))

Catch22Classifier(estimator=RandomForestClassifier(n_estimators=50),
                  outlier_norm=True)

In [None]:
pred_and_report_4(clf_22,X_test,y_test)

Accuracy:  0.9041306015119974
Precision for  Farm :  0.8796616022099447
Recall for  Farm :  0.9839706450366937
F1 Score for  Farm :  0.9288969917958068
Precision for  Forest :  0.9847094801223242
Recall for  Forest :  0.9310202395704255
F1 Score for  Forest :  0.9571125265392781
Precision for  Barren :  0.8896174863387978
Recall for  Barren :  0.5963369963369963
F1 Score for  Barren :  0.7140350877192984
Precision for  Water :  0.6793893129770993
Recall for  Water :  0.5460122699386503
F1 Score for  Water :  0.6054421768707482


In [None]:
#ROCKET

from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket

rocket = Rocket(num_kernels=2000)
rocket.fit(X_train)
X_train_transform = rocket.transform(X_train)

lclf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
lclf.fit(X_train_transform, np.array(y_train))

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [None]:
X_test_transform = rocket.transform(X_test)
pred_and_report_4(lclf,X_test_transform,y_test)

Accuracy:  0.8582228552645995
Precision for  Farm :  0.9163396361041741
Recall for  Farm :  0.9920818848976439
F1 Score for  Farm :  0.9527077151335313
Precision for  Forest :  0.9790940766550522
Recall for  Forest :  0.5803387030152829
F1 Score for  Forest :  0.7287344398340249
Precision for  Barren :  0.6575939031028851
Recall for  Barren :  0.884981684981685
F1 Score for  Barren :  0.7545284197376639
Precision for  Water :  0.3333333333333333
Recall for  Water :  0.50920245398773
F1 Score for  Water :  0.4029126213592233


In [None]:
#Supervised Time Series Forest

from sktime.classification.interval_based import SupervisedTimeSeriesForest

clf_stsf = SupervisedTimeSeriesForest(n_estimators=50)
clf_stsf.fit(X_train, np.array(y_train))

SupervisedTimeSeriesForest(n_estimators=50)

In [None]:
pred_and_report_4(clf_stsf,X_test,y_test)

Accuracy:  0.8348855045469487
Precision for  Farm :  0.7935085007727976
Recall for  Farm :  0.9915025106218617
F1 Score for  Farm :  0.8815247252747251
Precision for  Forest :  0.9972850678733032
Recall for  Forest :  0.4551838083436596
F1 Score for  Forest :  0.6250709018718094
Precision for  Barren :  0.886685552407932
Recall for  Barren :  0.9172161172161172
F1 Score for  Barren :  0.9016924738926898
Precision for  Water :  0.9428571428571428
Recall for  Water :  0.8098159509202454
F1 Score for  Water :  0.8712871287128713


In [None]:
#Arsenal

from sktime.classification.kernel_based import Arsenal

ars=Arsenal(num_kernels=500)
ars.fit(X_train, np.array(y_train))

Arsenal(num_kernels=500)

In [None]:
pred_and_report_4(ars,X_test,y_test)

Accuracy:  0.9116905883641941
Precision for  Farm :  0.9553488372093023
Recall for  Farm :  0.9916956353804558
F1 Score for  Farm :  0.973182981142803
Precision for  Forest :  0.9861963190184049
Recall for  Forest :  0.7967781908302355
F1 Score for  Forest :  0.8814256339958875
Precision for  Barren :  0.7189582071471835
Recall for  Barren :  0.8695970695970696
F1 Score for  Barren :  0.7871352785145888
Precision for  Water :  0.4827586206896552
Recall for  Water :  0.4294478527607362
F1 Score for  Water :  0.45454545454545453


In [None]:
#FreshPRINCE

from sktime.classification.feature_based import FreshPRINCE
from sktime._contrib.vector_classifiers._rotation_forest import RotationForest

rf = FreshPRINCE(
    default_fc_parameters="minimal",
    n_estimators=50,
)
rf.fit(X_train, np.array(y_train))

FreshPRINCE(default_fc_parameters='minimal', n_estimators=50)

In [None]:
pred_and_report_4(rf,X_test,y_test)

Accuracy:  0.8472663525802564
Precision for  Farm :  0.8793812414333269
Recall for  Farm :  0.8673232908458864
F1 Score for  Farm :  0.8733106465726787
Precision for  Forest :  0.888093443858327
Recall for  Forest :  0.9735646427096242
F1 Score for  Forest :  0.9288669950738916
Precision for  Barren :  0.6576862123613312
Recall for  Barren :  0.608058608058608
F1 Score for  Barren :  0.6318995051389417
Precision for  Water :  0.5288461538461539
Recall for  Water :  0.3374233128834356
F1 Score for  Water :  0.41198501872659177


**Farm vs Non-Farm**

In [None]:
y_train =[0 for i in range((4*len(farm_X_nested))//5)] + [1 for i in range((4*len(forest_X_nested))//5)] + [1 for i in range((4*len(barren_X_nested))//5)] + [1 for i in range((4*len(water_X_nested))//5)]
y_test =[0 for i in range((len(farm_X_nested))//5)] + [1 for i in range((len(forest_X_nested))//5)] + [1 for i in range((len(barren_X_nested))//5)] + [1 for i in range((len(water_X_nested))//5)]

In [None]:
farm_X_test = farm_X_nested.head(4*len(farm_X_nested)//5)
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([farm_X_test, forest_X_test,barren_X_test,water_X_test], ignore_index=True)
X_train

Unnamed: 0,var_0
0,0 0.174187 1 0.174187 2 0.180514 3...
1,0 0.166203 1 0.166203 2 0.174591 3...
2,0 0.174308 1 0.174308 2 0.174591 3...
3,0 0.162735 1 0.162735 2 0.186563 3...
4,0 0.164157 1 0.164157 2 0.157216 3...
...,...
36505,0 0.272582 1 0.327732 2 0.292255 3...
36506,0 0.345598 1 0.502703 2 0.518893 3...
36507,0 0.462374 1 0.594356 2 0.705277 3...
36508,0 0.273825 1 0.373573 2 0.331202 3...


In [None]:
farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([farm_X_test, forest_X_test, barren_X_test, water_X_test], ignore_index=True)
X_test

Unnamed: 0,var_0
0,0 0.169118 1 0.194697 2 0.151170 3...
1,0 0.185425 1 0.244043 2 0.144904 3...
2,0 0.294294 1 0.346191 2 0.146226 3...
3,0 0.376050 1 0.274332 2 0.174566 3...
4,0 0.186569 1 0.196424 2 0.146710 3...
...,...
9122,0 -0.177267 1 -0.158859 2 -0.081712 3...
9123,0 -0.195121 1 -0.081571 2 -0.062606 3...
9124,0 -0.181384 1 -0.100877 2 -0.040061 3...
9125,0 -0.192886 1 -0.110020 2 -0.032616 3...


In [None]:
from sktime.classification.interval_based import TimeSeriesForestClassifier

tsf_11 = TimeSeriesForestClassifier(n_estimators=200, random_state=42)

tsf_11.fit(X_train, np.array(y_train))

TimeSeriesForestClassifier(random_state=42)

In [None]:
pred_and_report(tsf_11,X_test,y_test,["Farm","Non-Farm"])

Accuracy:  0.9613235455242687
Precision for  Farm :  0.958737402548013
Recall for  Farm :  0.973735032831209
F1 Score for  Farm :  0.9661780205039763
Precision for  Non-Farm :  0.9648397104446742
Recall for  Non-Farm :  0.9450493795897695
F1 Score for  Non-Farm :  0.9548420110016631


**Tabularisation**

In [None]:
y_train = [0 for i in range((4*len(forest_X_nested))//5)] + [1 for i in range((4*len(water_X_nested))//5)]
y_test = [0 for i in range((len(forest_X_nested))//5)] + [1 for i in range((len(water_X_nested))//5)]

In [None]:
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
#barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([forest_X_test, water_X_test], ignore_index=True)
X_train

Unnamed: 0,var_0
0,0 0.193798 1 0.182723 2 0.148110 3...
1,0 0.157088 1 0.156824 2 0.136332 3...
2,0 0.204357 1 0.148156 2 0.140777 3...
3,0 0.167044 1 0.157140 2 0.128880 3...
4,0 0.175127 1 0.173962 2 0.135108 3...
...,...
10332,0 0.272582 1 0.327732 2 0.292255 3...
10333,0 0.345598 1 0.502703 2 0.518893 3...
10334,0 0.462374 1 0.594356 2 0.705277 3...
10335,0 0.273825 1 0.373573 2 0.331202 3...


In [None]:
#farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
#barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([forest_X_test, water_X_test], ignore_index=True)
X_test

Unnamed: 0,var_0
0,0 0.376582 1 0.356354 2 0.278760 3...
1,0 0.366741 1 0.341127 2 0.271832 3...
2,0 0.328367 1 0.327790 2 0.250570 3...
3,0 0.328367 1 0.311539 2 0.256233 3...
4,0 0.353991 1 0.318725 2 0.258820 3...
...,...
2579,0 -0.177267 1 -0.158859 2 -0.081712 3...
2580,0 -0.195121 1 -0.081571 2 -0.062606 3...
2581,0 -0.181384 1 -0.100877 2 -0.040061 3...
2582,0 -0.192886 1 -0.110020 2 -0.032616 3...


In [None]:
from sklearn.ensemble import RandomForestClassifier

from sktime.datatypes._panel._convert import from_nested_to_2d_array

X_train_tab = from_nested_to_2d_array(X_train)
X_test_tab = from_nested_to_2d_array(X_test)

In [None]:
X_train_tab.head()

Unnamed: 0,var_0__0,var_0__1,var_0__2,var_0__3,var_0__4,var_0__5,var_0__6,var_0__7,var_0__8,var_0__9,...,var_0__26,var_0__27,var_0__28,var_0__29,var_0__30,var_0__31,var_0__32,var_0__33,var_0__34,var_0__35
0,0.193798,0.182723,0.14811,0.125535,0.125356,0.191647,0.257937,0.324228,0.390519,0.456809,...,0.188378,0.173056,0.158884,0.170287,0.202409,0.23453,0.266651,0.298773,0.281293,0.275518
1,0.157088,0.156824,0.136332,0.1154,0.120113,0.149435,0.178757,0.208079,0.237401,0.266723,...,0.181508,0.146944,0.141807,0.150245,0.182698,0.215152,0.247606,0.28006,0.239394,0.229321
2,0.204357,0.148156,0.140777,0.129717,0.129761,0.191073,0.252384,0.313695,0.375006,0.436317,...,0.214154,0.168672,0.184915,0.163251,0.201119,0.238987,0.276855,0.314723,0.27302,0.301828
3,0.167044,0.15714,0.12888,0.119801,0.119443,0.154398,0.189354,0.224309,0.259265,0.29422,...,0.182233,0.150858,0.151486,0.156094,0.187084,0.218075,0.249065,0.280055,0.268779,0.249803
4,0.175127,0.173962,0.135108,0.130868,0.139634,0.175769,0.211905,0.24804,0.284175,0.320311,...,0.182527,0.161755,0.156478,0.176969,0.213349,0.249728,0.286108,0.322488,0.272612,0.2606


In [None]:
X_test_tab.head()

Unnamed: 0,var_0__0,var_0__1,var_0__2,var_0__3,var_0__4,var_0__5,var_0__6,var_0__7,var_0__8,var_0__9,...,var_0__26,var_0__27,var_0__28,var_0__29,var_0__30,var_0__31,var_0__32,var_0__33,var_0__34,var_0__35
0,0.376582,0.356354,0.27876,0.405654,0.499667,0.59368,0.60219,0.610701,0.619212,0.627723,...,0.376437,0.474315,0.508389,0.542464,0.576539,0.610614,0.644688,0.678763,0.712838,0.712838
1,0.366741,0.341127,0.271832,0.38173,0.472017,0.562305,0.571958,0.581611,0.591263,0.600916,...,0.329467,0.48067,0.511058,0.541446,0.571834,0.602222,0.632611,0.662999,0.693387,0.693387
2,0.328367,0.32779,0.25057,0.350365,0.456335,0.562305,0.572981,0.583658,0.594334,0.60501,...,0.319352,0.45397,0.488173,0.522375,0.556577,0.59078,0.624982,0.659184,0.693387,0.693387
3,0.328367,0.311539,0.256233,0.333333,0.476793,0.620253,0.61758,0.614906,0.612233,0.60956,...,0.308928,0.460543,0.495857,0.53117,0.566483,0.601797,0.63711,0.672423,0.707736,0.707736
4,0.353991,0.318725,0.25882,0.366717,0.501206,0.635696,0.63449,0.633284,0.632078,0.630872,...,0.316266,0.446972,0.486091,0.525209,0.564328,0.603447,0.642566,0.681684,0.720803,0.720803


In [None]:
tab = RandomForestClassifier(n_estimators=200, random_state=42)
tab.fit(X_train_tab, np.array(y_train))

RandomForestClassifier(n_estimators=200, random_state=42)

In [None]:
pred_and_report(tab,X_test_tab,y_test,["Forest","Water"])

Accuracy:  0.9996130030959752
Precision for  Forest :  0.999587118084228
Recall for  Forest :  1.0
F1 Score for  Forest :  0.999793516415445
Precision for  Water :  1.0
Recall for  Water :  0.9938650306748467
F1 Score for  Water :  0.9969230769230769


In [None]:
y_train = [0 for i in range((4*len(forest_X_nested))//5)] + [1 for i in range((4*len(barren_X_nested))//5)] + [2 for i in range((4*len(water_X_nested))//5)]
y_test = [0 for i in range((len(forest_X_nested))//5)] + [1 for i in range((len(barren_X_nested))//5)] + [2 for i in range((len(water_X_nested))//5)]

In [None]:
forest_X_test = forest_X_nested.head(4*len(forest_X_nested)//5)
barren_X_test = barren_X_nested.head(4*len(barren_X_nested)//5)
water_X_test = water_X_nested.head(4*len(water_X_nested)//5)
X_train = pd.concat([forest_X_test, barren_X_test, water_X_test], ignore_index=True)
X_train

Unnamed: 0,var_0
0,0 0.193798 1 0.182723 2 0.148110 3...
1,0 0.157088 1 0.156824 2 0.136332 3...
2,0 0.204357 1 0.148156 2 0.140777 3...
3,0 0.167044 1 0.157140 2 0.128880 3...
4,0 0.175127 1 0.173962 2 0.135108 3...
...,...
15793,0 0.272582 1 0.327732 2 0.292255 3...
15794,0 0.345598 1 0.502703 2 0.518893 3...
15795,0 0.462374 1 0.594356 2 0.705277 3...
15796,0 0.273825 1 0.373573 2 0.331202 3...


In [None]:
#farm_X_test = farm_X_nested.tail(len(farm_X_nested)//5)
forest_X_test = forest_X_nested.tail(len(forest_X_nested)//5)
barren_X_test = barren_X_nested.tail(len(barren_X_nested)//5)
water_X_test = water_X_nested.tail(len(water_X_nested)//5)
X_test = pd.concat([forest_X_test, barren_X_test, water_X_test], ignore_index=True)
X_test

Unnamed: 0,var_0
0,0 0.376582 1 0.356354 2 0.278760 3...
1,0 0.366741 1 0.341127 2 0.271832 3...
2,0 0.328367 1 0.327790 2 0.250570 3...
3,0 0.328367 1 0.311539 2 0.256233 3...
4,0 0.353991 1 0.318725 2 0.258820 3...
...,...
3944,0 -0.177267 1 -0.158859 2 -0.081712 3...
3945,0 -0.195121 1 -0.081571 2 -0.062606 3...
3946,0 -0.181384 1 -0.100877 2 -0.040061 3...
3947,0 -0.192886 1 -0.110020 2 -0.032616 3...


In [None]:
from sklearn.ensemble import RandomForestClassifier

from sktime.datatypes._panel._convert import from_nested_to_2d_array

X_train_tab = from_nested_to_2d_array(X_train)
X_test_tab = from_nested_to_2d_array(X_test)

In [None]:
tab = RandomForestClassifier(n_estimators=500, random_state=42)
tab.fit(X_train_tab, np.array(y_train))

RandomForestClassifier(n_estimators=500, random_state=42)

In [None]:
pred_and_report(tab,X_test_tab,y_test,["Forest","Barren","Water"])

Accuracy:  0.9721448467966574
Precision for  Forest :  0.998345055854365
Recall for  Forest :  0.996695580338703
F1 Score for  Forest :  0.9975196362133113
Precision for  Barren :  0.9380222841225627
Recall for  Barren :  0.9868131868131869
F1 Score for  Barren :  0.9617993573723671
Precision for  Water :  0.8229166666666666
Recall for  Water :  0.48466257668711654
F1 Score for  Water :  0.61003861003861


**Feature Extractor**

In [None]:
! pip install numba>=0.54

In [None]:
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor

transformer = TSFreshFeatureExtractor(default_fc_parameters="minimal")
extracted_features = transformer.fit_transform(X_train)
extracted_features.head()

Feature Extraction: 100%|██████████| 15798/15798 [00:13<00:00, 1165.95it/s]


Unnamed: 0,var_0__sum_values,var_0__median,var_0__mean,var_0__length,var_0__standard_deviation,var_0__variance,var_0__root_mean_square,var_0__maximum,var_0__absolute_maximum,var_0__minimum
0,8.662253,0.230356,0.240618,36.0,0.082993,0.006888,0.254529,0.456809,0.456809,0.11663
1,7.375985,0.202492,0.204888,36.0,0.066647,0.004442,0.215456,0.417133,0.417133,0.107829
2,8.88601,0.245253,0.246834,36.0,0.083418,0.006959,0.260548,0.450324,0.450324,0.125439
3,7.620635,0.204027,0.211684,36.0,0.072875,0.005311,0.223877,0.44991,0.44991,0.119443
4,8.399212,0.224317,0.233311,36.0,0.079153,0.006265,0.246373,0.478547,0.478547,0.121602


In [None]:
from sklearn.ensemble import RandomForestClassifier

tse = RandomForestClassifier(n_estimators=1000, random_state=42)
tse.fit(extracted_features, np.array(y_train))

RandomForestClassifier(n_estimators=1000, random_state=42)

In [None]:
test_e = transformer.transform(X_test)
pred_and_report(tse,test_e,y_test,["Forest", "Barren", "Water"])

Feature Extraction: 100%|██████████| 3949/3949 [00:03<00:00, 1178.89it/s]


Accuracy:  0.9599898708533806
Precision for  Forest :  0.9765372168284789
Recall for  Forest :  0.9971086327963652
F1 Score for  Forest :  0.9867157163294502
Precision for  Barren :  0.9392423159399571
Recall for  Barren :  0.9626373626373627
F1 Score for  Barren :  0.9507959479015919
Precision for  Water :  0.8076923076923077
Recall for  Water :  0.38650306748466257
F1 Score for  Water :  0.5228215767634855


**Other Classifiers**

In [None]:
#DrCIF
#too slow

from sktime.classification.interval_based import DrCIF

drcif = DrCIF(n_estimators=3, n_intervals=2, att_subsample_size=2)
drcif.fit(X_train, np.array(y_train))

In [None]:
#TDE
#too slow

from sktime.classification.dictionary_based import TemporalDictionaryEnsemble

clf_tde = TemporalDictionaryEnsemble(
    n_parameter_samples=20,
    max_ensemble_size=3,
    randomly_selected_params=5,
)

clf_tde.fit(X_train, np.array(y_train))

In [None]:
#cBOSS
#too slow

from sktime.classification.dictionary_based import ContractableBOSS

clf = ContractableBOSS(n_parameter_samples=20, max_ensemble_size=3)
clf.fit(X_train, np.array(y_train))
clf.score(X_test, np.array(y_test))

In [None]:
#KNN with DTW
#too slow

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, distance="dtw")
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

In [None]:
#Proximity Forest
#too slow


from sktime.classification.distance_based import ProximityForest

clf_pf = ProximityForest(n_estimators=2, max_depth=2, n_stump_evaluations=1)

clf_pf.fit(X_train, np.array(y_train))

In [None]:
#HIVECOTEV2
#too slow

from sktime.classification.hybrid import HIVECOTEV2
from sktime._contrib.vector_classifiers._rotation_forest import RotationForest

clf_hv2 = HIVECOTEV2(
    stc_params={
        "estimator": RotationForest(n_estimators=3),
        "n_shapelet_samples": 100,
        "max_shapelets": 10,
        "batch_size": 20,
    },
    drcif_params={"n_estimators": 2, "n_intervals": 2, "att_subsample_size": 2},
    arsenal_params={"num_kernels": 50, "n_estimators": 3},
    tde_params={
        "n_parameter_samples": 10,
        "max_ensemble_size": 3,
        "randomly_selected_params": 5,
    },
)

clf_hv2.fit(X_train, np.array(y_train))