In [None]:
# Import required Dependencies
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn
import datetime

# Pre Processing
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from functools import partial
from hyperopt import hp,fmin,tpe,Trials
from hyperopt.pyll.base import scope

# Error Metrics
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

# Regressors
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor

# Classifiers
from sklearn import svm
from sklearn import linear_model
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# DL models
from tensorflow.keras import layers 
from tensorflow.keras.models import Sequential

# Save models
import pickle

#Cross Validation 
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import optuna

#Using GPU
from numba import jit, cuda 

#Parallel backend processing
from concurrent.futures import ProcessPoolExecutor
from joblib import parallel_backend

In [None]:
import os

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "LU"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution, bbox_inches='tight')

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [None]:
# Load the saved sample of data in CSV format (10% of the resampled one)
df = pd.read_csv('LU_data_sample.csv')
#df = all_data.sample(frac =.1)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['class'].value_counts()

In [None]:
np.isinf(df).values.sum() 

In [None]:
df.isnull().sum()

# Data Preparation for Model Building

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Creating training and test sets
# Splitting the data into train and test
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Rescaling the features
scaler=MinMaxScaler()
X=scaler.fit_transform(X)

# train test split with train_size=80% and test_size=20%
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.80, random_state=101)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

## Classifiers

### GridSearch/RandomSearch or testing many parameters each classifier

In [None]:
@jit(target_backend='cuda')
def set_grid(est, param, cv = 10):
    cv = GridSearchCV(
        estimator = est,
        param_grid = param,
        cv = cv,
        scoring = "accuracy",
        verbose = 10,
        n_jobs = -1
    )
    return cv

def set_rand(est, param, cv = 10):
    cv = RandomizedSearchCV(
        estimator = est,
        param_distributions = param,
        cv = cv,
        scoring = "accuracy",
        verbose = 10,
        n_jobs = -1
    )
    return cv

def print_results(model_cv, X_test_s, y_test):
    return (model_cv.best_score_, model_cv.best_params_, model_cv.score(X_test_s, y_test))

In [None]:
#LU = df.class.unique()
LU = ['Cultivated_area', 'Trees_palms', 'Buildings', 'Roads', 'Water_bodies', 'Aqua_culture']
LU

### KNeighborsClassifier

#### Apply different parameters of the classifier using GridSearch/RandomSearch for getting the best parameter

In [None]:
knn = KNeighborsClassifier()
param_cv = {"n_neighbors" : range(4, 9)}

knn_cv = set_rand(knn, param_cv)
knn_cv.get_params()

In [None]:
with parallel_backend('multiprocessing'):
    knn_cv.fit(X_train, y_train)

In [None]:
print_results(knn_cv, X_test, y_test)

In [None]:
# evaluation: Confusion Matrix
confusion_knn = metrics.confusion_matrix(y_true = y_test, y_pred = knn_cv.predict(X_test))
confusion_knn

In [None]:
df_cm_knn = pd.DataFrame(confusion_knn, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_knn, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("KNeighbors_cm")

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test, y_pred=knn_cv.predict(X_test))

In [None]:
# Save model as pkl file 
pickle.dump(knn_cv, open('models/LU/knn_cv.pkl', 'wb'))
#pickled_model = pickle.load(open('models/LU/knn_cv.pkl', 'rb'))
#pickled_model.predict(X_test)

#### Apply different parameters of the classifier using Hyperparameters (Optuna) for getting the best parameter

In [None]:
# no params any more
def optimize_kn(trial,x,y):
    n_neighbors=trial.suggest_int('n_neighbors',4,9,step=1)
    
    model=KNeighborsClassifier(n_neighbors=n_neighbors)
    
    kf=StratifiedKFold(n_splits=10)
    accuracies=[]
    
    for idx in kf.split(X=x,y=y):
        train_idx,test_idx=idx[0],idx[1]
        xtrain=x[train_idx]
        ytrain=y[train_idx]
        
        xtest=x[test_idx]
        ytest=y[test_idx]
        
        model.fit(xtrain,ytrain)
        preds=model.predict(xtest)
        fold_acc=metrics.accuracy_score(ytest,preds)
        accuracies.append(fold_acc)
        
    avg_accuracy = np.mean(accuracies)
    
    return -1 * avg_accuracy

In [None]:
optimization_function_kn=partial(optimize_kn,x=X,y=y)
study_kn=optuna.create_study(direction='minimize')

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    study_kn.optimize(optimization_function_kn, n_trials=10, n_jobs=-1)
print(datetime.datetime.now())

In [None]:
# Get the best trial
best_trial = study_kn.best_trial
best_params = best_trial.params
best_accuracy = -best_trial.value

print("Best Parameters:", best_params)
print("Best Accuracy:", best_accuracy)

In [None]:
# Train a new model on the entire dataset using the best parameters
kn_model = KNeighborsClassifier(**best_params)
kn_model.fit(X_train, y_train)

In [None]:
# Evaluate the model on the entire dataset
y_pred = kn_model.predict(X_test)

In [None]:
# Confusion matrix
confusion_kn = metrics.confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion_kn)

In [None]:
df_cm_kn = pd.DataFrame(confusion_kn, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_kn, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("KNeighbors_cm_opt")

In [None]:
# Save the best model
with open('models/LU/kn_model.pkl', 'wb') as model_file:
    pickle.dump(dt_model, model_file)
#pickled_model = pickle.load(open('models/LU/kn_model.pkl', 'rb'))
#pickled_model.predict(X_test)

### DecisionTreeClassifier

#### Apply different parameters of the classifier using GridSearch/RandomSearch for getting the best parameter

In [None]:
dt = DecisionTreeClassifier()
param_cv = {"criterion": ['gini','entropy'], "max_depth" : [15,18,20], "min_samples_split" : [50,80,100]}
dt_cv = set_rand(dt, param_cv)
dt_cv.get_params()

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    dt_cv.fit(X_train, y_train)
print(datetime.datetime.now())

In [None]:
print_results(dt_cv, X_test, y_test)

In [None]:
# evaluation: Confusion Matrix
confusion_dt = metrics.confusion_matrix(y_true = y_test, y_pred = dt_cv.predict(X_test))
confusion_dt

In [None]:
df_cm_dt = pd.DataFrame(confusion_dt, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_dt, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("Decision_tree_cm")

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test, y_pred=dt_cv.predict(X_test))

In [None]:
# Save model as pkl file 
pickle.dump(dt_cv, open('models/LU/dt_cv.pkl', 'wb'))
#pickled_model = pickle.load(open('models/LU/dt_cv.pkl', 'rb'))
#pickled_model.predict(X_test)

#### Apply different parameters of the classifier using Hyperparameters (Optuna) for getting the best parameter

In [None]:
# no params any more
def optimize_dt(trial,x,y):
    criterion=trial.suggest_categorical('criterion',['gini','entropy'])
    max_depth=trial.suggest_int('max_depth',15,20,step=1)
    #max_features_values = np.arange(0.01, 1.01, 0.1)
    #max_features = trial.suggest_categorical('max_features', max_features_values)
    min_samples_split=trial.suggest_int("min_samples_split",50,100,step=10)
    
    model=DecisionTreeClassifier(
        criterion=criterion,
        max_depth=max_depth,
        #max_features=max_features,
        min_samples_split=min_samples_split
    )
    
    kf=StratifiedKFold(n_splits=10)
    accuracies=[]
    #confusion_matrices = []
    
    for idx in kf.split(X=x,y=y):
        train_idx,test_idx=idx[0],idx[1]
        xtrain=x[train_idx]
        ytrain=y[train_idx]
        
        xtest=x[test_idx]
        ytest=y[test_idx]
        
        model.fit(xtrain,ytrain)
        preds=model.predict(xtest)
        fold_acc=metrics.accuracy_score(ytest,preds)
        accuracies.append(fold_acc)
        #conf_matrix = metrics.confusion_matrix(ytest, preds)
        #print(f"Confusion Matrix (before casting): {conf_matrix}")
        #print(f"Data type of the confusion matrix: {conf_matrix.dtype}")
        #confusion_matrices.append(conf_matrix.astype(float))
        
    avg_accuracy = np.mean(accuracies)
    #avg_conf_matrix = np.median(confusion_matrices, axis=0).astype(float)
    #avg_conf_matrix = np.array(avg_conf_matrix, dtype=float)
    
    return (-1 * avg_accuracy)

In [None]:
optimization_function_dt=partial(optimize_dt,x=X,y=y)
study_dt=optuna.create_study(direction="minimize")

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    study_dt.optimize(optimization_function_dt, n_trials=10, n_jobs=-1)
print(datetime.datetime.now())

In [None]:
# Get the best trial
best_trial = study_dt.best_trial
best_params = best_trial.params
best_accuracy = -best_trial.value

print("Best Parameters:", best_params)
print("Best Accuracy:", best_accuracy)

In [None]:
# Train a new model on the entire dataset using the best parameters
dt_model = DecisionTreeClassifier(**best_params)
dt_model.fit(X_train, y_train)

In [None]:
# Evaluate the model on the entire dataset
y_pred = dt_model.predict(X_test)

In [None]:
# Confusion matrix
confusion_dt = metrics.confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion_dt)

In [None]:
df_cm_dt = pd.DataFrame(confusion_dt, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_dt, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("Decision_Tree_cm_opt")

In [None]:
# Save the best model
with open('models/LU/dt_model.pkl', 'wb') as model_file:
    pickle.dump(dt_model, model_file)
#pickled_model = pickle.load(open('models/LU/dt_model.pkl', 'rb'))
#pickled_model.predict(X_test)

### RandomForestClassifier

#### Apply different parameters of the classifier using GridSearch/RandomSearch for getting the best parameter

In [None]:
rf = RandomForestClassifier(max_depth=18, min_samples_split=100)
param_cv = {"n_estimators" : [500, 700, 1000]}
rf_cv = set_rand(rf, param_cv)
rf_cv.get_params()

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    rf_cv.fit(X_train, y_train)
print(datetime.datetime.now())

In [None]:
print_results(rf_cv, X_test, y_test)

In [None]:
# evaluation: Confusion Matrix
confusion_rf = metrics.confusion_matrix(y_true = y_test, y_pred = rf_cv.predict(X_test))
confusion_rf

In [None]:
df_cm_rf = pd.DataFrame(confusion_rf, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_rf, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("Random_forest_cm")

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test, y_pred=rf_cv.predict(X_test))

In [None]:
# Save model as pkl file 
pickle.dump(rf_cv, open('models/LU/rf_cv.pkl', 'wb'))
#pickled_model = pickle.load(open('models/LU/rf_cv.pkl', 'rb'))
#pickled_model.predict(X_test)

#### Apply different parameters of the classifier using Hyperparameters (Optuna) for getting the best parameter

In [None]:
# no params any more
def optimize_rf(trial,x,y):
    criterion=trial.suggest_categorical('criterion',['gini','entropy'])
    n_estimators=trial.suggest_int('n_estimators',500,1000,step=100)
    max_depth=trial.suggest_int('max_depth',15,20,step=1)
    min_samples_split=trial.suggest_int("min_samples_split",50,100,step=10)
    
    model=RandomForestClassifier(
        criterion=criterion,
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split
    )
    
    kf=StratifiedKFold(n_splits=10)
    accuracies=[]
    
    for idx in kf.split(X=x,y=y):
        train_idx,test_idx=idx[0],idx[1]
        xtrain=x[train_idx]
        ytrain=y[train_idx]
        
        xtest=x[test_idx]
        ytest=y[test_idx]
        
        model.fit(xtrain,ytrain)
        preds=model.predict(xtest)
        fold_acc=metrics.accuracy_score(ytest,preds)
        accuracies.append(fold_acc)
        
    avg_accuracy = np.mean(accuracies)
    
    return -1 * avg_accuracy

In [None]:
def objective(trial):
    n_estimators=trial.suggest_int('n_estimators',500,1100,step=200)
    
    model=RandomForestClassifier(
        criterion='entropy',
        n_estimators=n_estimators,
        max_depth=18,
        min_samples_split=80
    )
    return cross_val_score(model, X_train, y_train, n_jobs=-1, cv=5).mean()

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    study_rf = optuna.create_study(direction='maximize')
    study_rf.optimize(objective, n_trials=10, n_jobs=-1)
    rf_model = RandomForestClassifier(**study_rf.best_params)
    rf_model.fit(X_train, y_train)
print(datetime.datetime.now())

In [None]:
optimization_function_rf=partial(optimize_rf,x=X,y=y)
study_rf=optuna.create_study(direction='minimize')

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    study_rf.optimize(optimization_function_rf, n_trials=10, n_jobs=-1)
print(datetime.datetime.now())

In [None]:
# Get the best trial
best_trial = study_rf.best_trial
best_params = best_trial.params
best_accuracy = -best_trial.value

print("Best Parameters:", best_params)
print("Best Accuracy:", best_accuracy)

In [None]:
# Train a new model on the entire dataset using the best parameters
rf_model = RandomForestClassifier(**best_params)
rf_model.fit(X_train, y_train)

In [None]:
# Evaluate the model on the entire dataset
y_pred = rf_model.predict(X_test)

In [None]:
# Confusion matrix
confusion_rf = metrics.confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion_rf)

In [None]:
df_cm_rf = pd.DataFrame(confusion_rf, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_rf, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("Random_forest_cm_opt")

In [None]:
# Save the best model
with open('models/LU/rf_model.pkl', 'wb') as model_file:
    pickle.dump(rf_model, model_file)
#pickled_model = pickle.load(open('models/LU/rf_model.pkl', 'rb'))
#pickled_model.predict(X_test)

### SVM Linear

#### Apply different parameters of the classifier in loops for getting the best parameter using GPU instead of CPU for faster performance

In [None]:
import os
os.add_dll_directory("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.3\\bin")

In [None]:
import thundersvm
from thundersvm import SVC

In [None]:
def random_search(X_train, X_test, y_train, y_test):
    results = []

    for C in [0.001, 0.005, 0.01, 0.05, 0.5, 1, 3, 5]:
        try:
            gamma = 0.1  # Fix gamma to a constant value
    
            model = thundersvm.SVC(
                gpu_id=0,
                kernel="linear",
                C=C,
                gamma=gamma
            )
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            result = metrics.accuracy_score(y_true=y_test, y_pred=y_pred)
    
            results.append((C, gamma, result))
        except Exception as e:
            print(f"Exception in random_search: {e}")

    return results

In [None]:
# Call the function to get the results
print(datetime.datetime.now())
svl_random_search = random_search(X_train, X_test, y_train, y_test)
svl_random_search
# Find the best result based on the mean accuracy
best_result = max(svl_random_search, key=lambda x: x[2])
best_C, best_gamma, best_performance = best_result

print(f"Best C: {best_C}, Best Gamma: {best_gamma}, Best Performance: {best_performance}")
print(datetime.datetime.now())

In [None]:
svc_lin_gpu = thundersvm.SVC(gpu_id=0, C=20, gamma=5, kernel='linear')
print(datetime.datetime.now())
svc_lin_gpu.fit(X_train, y_train)
print(datetime.datetime.now())
# Evaluate the model on the entire dataset
y_pred = svc_lin_gpu.predict(X_test)
print(datetime.datetime.now())

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test, y_pred=y_pred)

In [None]:
# Confusion matrix
confusion_svl = metrics.confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion_svl)

In [None]:
df_cm_svl = pd.DataFrame(confusion_svl, index = [i for i in LU],
                  columns = [i for i in LU])
3df_cm_svl.to_csv('LU_svl.csv', index=False)
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_svl, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("SVM_Linear_cm_opt")

In [None]:
# Save the best model
with open('models/LU/svl_model.pkl', 'wb') as model_file:
    pickle.dump(svc_lin_gpu, model_file)
#pickled_model = pickle.load(open('models/LU/svl_model.pkl', 'rb'))
#pickled_model.predict(X_test)

#### Apply different parameters of the classifier using GridSearch/RandomSearch for getting the best parameter (CPU)

In [None]:
svc_linear = SVC(kernel = "linear")
param_cv = {"C": [0.001, 0.005, 0.01, 0.05, 0.5, 1, 3, 5]}
svc_linear_cv = set_rand(svc_linear, param_cv)
svc_linear_cv.get_params()

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    svc_linear_cv.fit(X_train, y_train)
print(datetime.datetime.now())

In [None]:
print_results(svc_linear_cv, X_test, y_test)

In [None]:
# evaluation: Confusion Matrix
confusion_svml = metrics.confusion_matrix(y_true = y_test, y_pred = svc_linear_cv.predict(X_test))
confusion_svml

In [None]:
df_cm_svml = pd.DataFrame(confusion_svml, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_svml, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("svm_linear_cm")

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test, y_pred=svc_linear_cv.predict(X_test))

In [None]:
# Save model as pkl file 
pickle.dump(scv_linear_cv, open('models/LU/svc_linear_cv.pkl', 'wb'))
#pickled_model = pickle.load(open('models/LU/svc_linear_cv.pkl', 'rb'))
#pickled_model.predict(X_test)

### SVM rbf

#### Apply different parameters of the classifier in loops for getting the best parameter using GPU instead of CPU for faster performance

In [None]:
def random_searchrbf(X_train, X_test, y_train, y_test):
    results = []

    for C in [20, 25, 30, 40]:
        for gamma in [1, 5, 10]:
            try:
                model = thundersvm.SVC(
                    gpu_id=0,
                    kernel="rbf",
                    C=C,
                    gamma=gamma
                )
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                result = metrics.accuracy_score(y_true=y_test, y_pred=y_pred)
        
                results.append((C, gamma, result))
            except Exception as e:
                print(f"Exception in random_search: {e}")

    return results

In [None]:
# Call the function to get the results
print(datetime.datetime.now())
svr_random_search = random_searchrbf(X_train, X_test, y_train, y_test)
print(svr_random_search)
# Find the best result based on the mean accuracy
best_result = max(svr_random_search, key=lambda x: x[2])
best_C, best_gamma, best_performance = best_result

print(f"Best C: {best_C}, Best Gamma: {best_gamma}, Best Performance: {best_performance}")
print(datetime.datetime.now())

In [None]:
svc_rbf_gpu = thundersvm.SVC(gpu_id=0, C=20, gamma=5, kernel='rbf')
print(datetime.datetime.now())
svc_rbf_gpu.fit(X_train, y_train)
print(datetime.datetime.now())
# Evaluate the model on the entire dataset
y_pred = svc_rbf_gpu.predict(X_test)
print(datetime.datetime.now())

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test, y_pred=y_pred)

In [None]:
# Confusion matrix
confusion_svr = metrics.confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion_svr)

In [None]:
df_cm_svr = pd.DataFrame(confusion_svr, index = [i for i in LU],
                  columns = [i for i in LU])
#df_cm_svr.to_csv('LU_svr.csv', index=False)
#df_cm_svr = pd.read_csv('LU_svr.csv')
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_svr,fmt='d',cmap=plt.cm.Blues, annot=True)
save_fig("SVM_rbf_cm_opt")

In [None]:
# Save the best model
with open('models/LU/svr_model.pkl', 'wb') as model_file:
    pickle.dump(svr_model, model_file)
#pickled_model = pickle.load(open('models/LU/svr_model.pkl', 'rb'))
#pickled_model.predict(X_test)

#### Apply different parameters of the classifier using GridSearch/RandomSearch for getting the best parameter

In [None]:
svc_rb = SVC(kernel = "rbf")
param_cv = {"C" : [10, 20, 30, 40], "gamma" : [0.1, 0.5, 1, 5, 10]}
svc_rb_cv = set_rand(svc_rb, param_cv)
svc_rb_cv.get_params()

In [None]:
print(datetime.datetime.now())
with parallel_backend('multiprocessing'):
    svc_rb_cv.fit(X_train, y_train)
print(datetime.datetime.now())

In [None]:
print_results(svc_rb_cv, X_test, y_test)

In [None]:
# evaluation: Confusion Matrix
confusion_svmr = metrics.confusion_matrix(y_true = y_test, y_pred = svc_rb_cv.predict(X_test))
confusion_svmr

In [None]:
df_cm_svmr = pd.DataFrame(confusion_svmr, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_svmr, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("svm_rbf_cm")

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test, y_pred=svc_rb_cv.predict(X_test))

In [None]:
# Save model as pkl file 
pickle.dump(svc_rb_cv, open('models/LU/svc_rb_cv.pkl', 'wb'))
#pickled_model = pickle.load(open('models/LU/svc_rb_cv.pkl', 'rb'))
#pickled_model.predict(X_test)

### LSTM

In [None]:
# Reshape the X shape to fit in LSTM (add one dimension)
train_X = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
test_X = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
# Change Y values to be from 0 to 5 instead of 1 to 6
y_train_mod = y_train - 1
y_test_mod = y_test - 1

In [None]:
print(train_X.shape)
print(test_X.shape)

In [None]:
# Define the LSTM model
model = Sequential()
model.add(layers.Bidirectional(layers.LSTM(16, return_sequences=True), input_shape=(20, 1)))
model.add(layers.Dropout(0.2))
model.add(layers.Bidirectional(layers.LSTM(32, return_sequences=True)))
model.add(layers.Dropout(0.2))
model.add(layers.Bidirectional(layers.LSTM(64, return_sequences=True)))
model.add(layers.Dropout(0.2))
model.add(layers.Bidirectional(layers.LSTM(64, return_sequences=True)))
model.add(layers.Dropout(0.2))
model.add(layers.Bidirectional(layers.LSTM(32, return_sequences=True)))
model.add(layers.Dropout(0.2))
model.add(layers.Bidirectional(layers.LSTM(16, return_sequences=False)))  # No return_sequences for the last LSTM layer
model.add(layers.Dropout(0.2))
model.add(layers.Dense(6, activation="softmax"))
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
model.summary()

In [None]:
#train 
print(datetime.datetime.now())
history = model.fit(train_X, y_train_mod, batch_size=2048, epochs=100, validation_split=0.1)
print(datetime.datetime.now())

In [None]:
preds = model.predict(test_X)

In [None]:
val_pred = np.argmax(model.predict(test_X), axis=-1)

In [None]:
preds.argmax(axis=1)

In [None]:
# evaluation: Confusion Matrix
confusion_lstm = metrics.confusion_matrix(y_true = y_test_mod, y_pred = val_pred)
confusion_lstm

In [None]:
df_cm_lstm = pd.DataFrame(confusion_lstm, index = [i for i in LU],
                  columns = [i for i in LU])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_lstm, annot=True,fmt='d',cmap=plt.cm.Blues)
save_fig("lstm_rbf_cm")

In [None]:
# measure accuracy
metrics.accuracy_score(y_true=y_test_mod, y_pred=val_pred)

In [None]:
# Save model as pkl file 
pickle.dump(model, open('models/LU/lstm_model.pkl', 'wb'))
#pickled_model = pickle.load(open('models/LU/model.pkl', 'rb'))
#pickled_model.predict(X_test)