# EuroSat Dataset ML Models

In [1]:
from config import Config
from src.colors import bcolors

c = bcolors()
config = Config()

### Load Data
For the training/validation data we use the csv for train.
The test data (manually labeled) is stored in a csv file with the labels.

In [2]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd

df = pd.read_csv(config.TRAIN_FILE)
test_df = pd.read_csv("labels.csv")

encoder = LabelEncoder()
encoder = encoder.fit(df[['label']].values.flatten())

For feature extraction you can specify a list of strings:

| feature name | description                              |
|--------------|------------------------------------------|
| color_hist   | extracts color histogram                 |
| hog          | extracts Histogram of Oriented Gradients |


In [None]:
from src.dataset import EuroSatMS
from sklearn.model_selection import train_test_split

channels = [3, 2, 1]
fe_methods = ["color_hist", "hog"]

train_df, val_df = train_test_split(df, test_size=0.1, stratify=df['label'])

ds_val = EuroSatMS(
    val_df, 
    config.TRAIN_MS_DIR,
    feature_extractor=fe_methods,
    encoder=encoder,
    select_chan=channels,
)

ds_test = EuroSatMS(
    test_df, 
    config.TEST_MS_DIR,
    feature_extractor=fe_methods,
    encoder=encoder,
    select_chan=channels,
)

ds_train = EuroSatMS(
    train_df, 
    config.TRAIN_MS_DIR,
    feature_extractor=fe_methods,
    encoder=encoder,
    select_chan=channels,
)


[92mPreloading images...[0m

[96mImages:         2700[0m
[96mJobs:           -4 [0m


[94mTime taken:      0 min 17.292309045791626 sec [0m

[92mPreloading images...[0m

[96mImages:         1003[0m
[96mJobs:           -4 [0m


[94mTime taken:      0 min 4.680271863937378 sec [0m

[92mPreloading images...[0m

[96mImages:         24300[0m
[96mJobs:           -4 [0m



In [None]:
print(ds_train[0][0].shape)
print(ds_train[0][1])

## Prepare Data

In [None]:
import numpy as np

X_train = ds_train.samples
y_train = ds_train.targets
X_test = ds_test.samples
y_test = ds_test.targets

print(f"X_train.shape -> {X_train.shape}\n"
      f"y_train.shape -> {y_train.shape}\n"
      f"X_test.shape  -> {X_test.shape}\n"
      f"y_test.shape  -> {y_test.shape}")

# extend X_train and y_train with X_test and y_test and create a list for the test/train fold indexes
feats = np.concatenate([X_train, X_test], axis=0)
labels = np.concatenate([y_train, y_test], axis=0)
feats_train_idx = np.arange(0, X_train.shape[0])
feats_test_idx = np.arange(X_train.shape[0], X_train.shape[0] + X_test.shape[0])

fold_idx = [(feats_train_idx, feats_test_idx)]

## Train Models
### XGBoost

In [None]:
from skopt.space import Integer, Real, Categorical
from xgboost import XGBClassifier, callback
from sklearn.model_selection import StratifiedKFold

import xgboost as xgb
from skopt import BayesSearchCV

# Assuming ds_train and ds_test are defined and contain your training and test datasets

    
model = XGBClassifier(
    nthread=-1,
    seed=27,
    device="cuda",
)

parameter_space = {
    'n_estimators': Integer(400, 500),
    'learning_rate': Real(0.0001, 0.01, prior='log-uniform'),
    'max_depth': Integer(3, 6),
    'subsample': Real(0.3, 0.7),
    'colsample_bytree': Real(0.3, 0.7),
    'booster': Categorical(['gbtree']),
    'reg_lambda': Integer(0, 30),
    'reg_alpha': Integer(0, 30),
    'gamma': Real(0.0, 3.0),
}

fit_param = {
    'early_stopping_rounds': 20,
    'eval_metric': 'mlogloss'
}

clf = BayesSearchCV(estimator=model,
                    search_spaces=parameter_space,
                    fit_params=fit_param,
                    cv=fold_idx,
                    n_iter=30,
                    scoring='accuracy',
                    verbose=4)

clf.fit(feats, y=labels)
print(clf.cv_results_)
print(clf.best_score_)
print(clf.best_params_)
results = pd.DataFrame(clf.cv_results_)
results.to_csv("results.csv")

### SVC

In [None]:
y_pred = model.predict(x[fold_idx[0][1]])
accuracy = accuracy_score(labels[fold_idx[0][1]], y_pred)
print(f"Model accuracy: {accuracy:.4f}")