#Scraping


In [None]:
#!/usr/bin/env python3

from bs4 import BeautifulSoup
import requests
import os
import re

ROOT_URL = "https://eoportal.org/"

def get_content(soup):
  try:
    parents_blacklist=['[document]','html','head',
                       'style','script','body',
                       'div','a','section','tr',
                       'td','label','ul','header',
                       'aside',]
    content = ''
    text = soup.find_all(text=True)
  
    for t in text:
        if t.parent.name not in parents_blacklist and len(t) > 20:
            content = content + t +' '

    return content
  except Exception as e:
      return None

  #return list of bilio of a single page
def get_bibliography(url):
  biblio_list = []
  pattern = re.compile("^foot[0-9]+\)$")
  page=requests.get(url)
  soup=BeautifulSoup(page.text,'html.parser')
  #paragraphs = soup.findAll('p')
  for a_tag in soup.find_all('a'):
    if "name" in a_tag.attrs:
      name_attr = a_tag.attrs["name"]
      if bool(pattern.match(name_attr)) and a_tag.parent.name =="p":
          biblio_list.append(a_tag.parent.text)
  return biblio_list

def get_airborne_sensors_data():
    # -- code to download lists of article from https://eoportal.org/web/eoportal/airborne-sensors
    url = "https://eoportal.org/web/eoportal/airborne-sensors/above"
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l = soup.find_all("ul", {"class": "layouts level-2 hover"})
    list_elements = soup.find_all("li")

    links = []
    
    for elem in list_elements:
        if elem.a:
            link = elem.a.get("href")
            if "http" in link:
                links.append(link)
            else:
                links.append(ROOT_URL + link)

    data_content = ""
    data_biblio = []
    # first 7 links not interesting
    for link in links[8:]:
        print(f"Downloading link: {link}")
        page_request = requests.get(link)
        page_html =  BeautifulSoup(page_request.text, 'html.parser')
        data_content += get_content(page_html)
        data_biblio += get_bibliography(link)

    with open("data_airborne_sensors_content.txt", "w") as f:
        f.write(data_content)

    with open("data_airborne_sensors_biblio.txt", "w") as f:
        f.write(str(data_biblio))


def get_missions_database_data():
    alphabet = ["a", "b", "c-missions", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t" "u", "v-w-x-y-z"]

    data_content = ""
    for a in alphabet:
        url = f"https://eoportal.org/web/eoportal/satellite-missions/{a}"
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
        
        # layouts level-2 hover
        l = soup.find_all("ul", {"class": "layouts level-2 hover"})
        list_elements = soup.find_all("li")

        links_pages = []
        for elem in list_elements:
            if elem.a and f"/{a}/" in elem.a.get("href"):
                links_pages.append(ROOT_URL + elem.a.get("href"))

        for link in links_pages:
            print(f"Downloading link: {link}")
            page_request = requests.get(link)
            page_html =  BeautifulSoup(page_request.text, 'html.parser')
            data_content += get_content(page_html)
        

    with open("data_missions_database_content.txt", "w") as f:
        f.write(data_content)
        
if __name__ == "__main__":
    # get_airborne_sensors_data()
    # get_missions_database_data()


#Adaptive Tuning

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import torch
torch.cuda.empty_cache()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%%capture
!pip install -q transformers
!pip install -q datasets

In [None]:
import multiprocessing

import transformers

from datasets import Dataset

from transformers import AutoModelForMaskedLM
from transformers import AutoTokenizer, AutoConfig

from transformers import BertForMaskedLM, DistilBertForMaskedLM
from transformers import BertTokenizer, DistilBertTokenizer
from transformers import RobertaTokenizer, RobertaForMaskedLM

from transformers import Trainer, TrainingArguments
from transformers import DataCollatorForLanguageModeling

from tokenizers import BertWordPieceTokenizer

import re
import pandas as pd

In [None]:
#read text
f = open('/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/data_airborne_sensors_content.txt')
sensors_mlm = f.read() 
sensors_mlm.replace("\n","")
sensors_mlm = re.split('(?<=[.!?]) +',sensors_mlm)

f = open('/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/data_missions_database_content.txt')
missions_mlm = f.read() 
missions_mlm.replace("\n","")
missions_mlm = re.split('(?<=[.!?]) +',missions_mlm)

text_mlm = sensors_mlm + missions_mlm

In [None]:
#ocreate dataframe
df_mlm = pd.DataFrame(text_mlm,columns = ["text"])
df_mlm["text"] = df_mlm["text"].apply(lambda x : x if len(str(x).split()) > 4 else None) #remove small sentences
df_mlm.dropna(inplace = True)

In [None]:
from sklearn.model_selection import train_test_split

# Train/Valid Split
df_train, df_valid = train_test_split(
    df_mlm, test_size=0.15, random_state = 42
)

len(df_train), len(df_valid)

(256989, 45352)

In [None]:
train_dataset = Dataset.from_pandas(df_train[['text']].dropna())
valid_dataset = Dataset.from_pandas(df_valid[['text']].dropna())

In [None]:
#Hyper Params
MAX_SEQ_LEN = 128
TRAIN_BATCH_SIZE = 16
EVAL_BATCH_SIZE = 16
LEARNING_RATE = 2e-5 
LR_WARMUP_STEPS = 100
WEIGHT_DECAY = 0.01

In [None]:
'''
bert-base-uncased  # 12-layer, 768-hidden, 12-heads, 109M parameters
distilbert-base-uncased  # 6-layer, 768-hidden, 12-heads, 65M parameters
'''

MODEL = 'distilbert' #'bert'
bert_type = 'distilbert-base-cased' # 12-layer, 768-hidden, 12-heads, 109M parameters

if MODEL == 'distilbert':
    TokenizerClass = DistilBertTokenizer 
    ModelClass = DistilBertForMaskedLM 
elif MODEL == 'bert':
    TokenizerClass = BertTokenizer
    ModelClass = BertForMaskedLM 
elif MODEL == 'roberta':
    TokenizerClass = RobertaTokenizer
    ModelClass = RobertaForMaskedLM
elif MODEL == 'scibert':
    TokenizerClass = AutoTokenizer
    ModelClass = AutoModelForMaskedLM


tokenizer = TokenizerClass.from_pretrained(
            bert_type, use_fast=True, do_lower_case=False, max_len=MAX_SEQ_LEN
            )
model = ModelClass.from_pretrained(bert_type)

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/251M [00:00<?, ?B/s]

In [None]:
def tokenize_function(row):
    return tokenizer(
        row['text'],
        padding='max_length',
        truncation=True,
        max_length=MAX_SEQ_LEN,
        return_special_tokens_mask=True)

In [None]:
column_names = train_dataset.column_names

train_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    num_proc=multiprocessing.cpu_count(),
    remove_columns=column_names,
)

valid_dataset = valid_dataset.map(
    tokenize_function,
    batched=True,
    num_proc=multiprocessing.cpu_count(),
    remove_columns=column_names,
)

     

#0:   0%|          | 0/65 [00:00<?, ?ba/s]

 

#1:   0%|          | 0/65 [00:00<?, ?ba/s]

  

#2:   0%|          | 0/65 [00:00<?, ?ba/s]

#3:   0%|          | 0/65 [00:00<?, ?ba/s]

     

#0:   0%|          | 0/12 [00:00<?, ?ba/s]

   

#1:   0%|          | 0/12 [00:00<?, ?ba/s]

#3:   0%|          | 0/12 [00:00<?, ?ba/s]

#2:   0%|          | 0/12 [00:00<?, ?ba/s]

In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=True, mlm_probability=0.15
)

In [None]:
steps_per_epoch = int(len(train_dataset) / TRAIN_BATCH_SIZE)

training_args = TrainingArguments(
    output_dir='./bert-news',
    logging_dir='./LMlogs',             
    num_train_epochs=2,
    do_train=True,
    do_eval=True,
    per_device_train_batch_size=TRAIN_BATCH_SIZE,
    per_device_eval_batch_size=EVAL_BATCH_SIZE,
    warmup_steps=LR_WARMUP_STEPS,
    save_steps=steps_per_epoch,
    save_total_limit=3,
    weight_decay=WEIGHT_DECAY,
    learning_rate=LEARNING_RATE, 
    # evaluate_during_training=True,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='loss', 
    greater_is_better=False,
    # metric_for_best_model='accuracy',  # Causes an error during training
                                         # unless you pass "compute_metrics" to
                                         # Trainer
    seed=42
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    # compute_metrics=compute_metrics,  # Causes an out of memory error
    tokenizer=tokenizer,
    # prediction_loss_only=True,
)

In [None]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `DistilBertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForMaskedLM.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 256989
  Num Epochs = 2
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 32124


Epoch,Training Loss,Validation Loss
1,2.0439,1.939857
2,1.9614,1.862966


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForMaskedLM.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 45352
  Batch size = 16
Saving model checkpoint to ./bert-news/checkpoint-16062
Configuration saved in ./bert-news/checkpoint-16062/config.json
Model weights saved in ./bert-news/checkpoint-16062/pytorch_model.bin
tokenizer config file saved in ./bert-news/checkpoint-16062/tokenizer_config.json
Special tokens file saved in ./bert-news/checkpoint-16062/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForMaskedLM.forward`,  you can safely ignore this message.
***** Running Evaluation ***

TrainOutput(global_step=32124, training_loss=2.07138116338039, metrics={'train_runtime': 8892.5732, 'train_samples_per_second': 57.799, 'train_steps_per_second': 3.612, 'total_flos': 1.7032777106208768e+16, 'train_loss': 2.07138116338039, 'epoch': 2.0})

In [None]:
trainer.save_model("/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model")

Saving model checkpoint to /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model
Configuration saved in /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/config.json
Model weights saved in /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/pytorch_model.bin
tokenizer config file saved in /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/special_tokens_map.json


##Perplexity Evaluation


In [None]:
import glob
import math

path = "/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model"

for modelpath in glob.iglob(path):
  print('Model: ', modelpath)
  tokenizer = AutoTokenizer.from_pretrained(modelpath, use_fast = False, do_lower_case=True)
  model = AutoModelForMaskedLM.from_pretrained(modelpath)

  trainer = Trainer(
    model=model,
    data_collator=data_collator,
    #train_dataset=tokenized_dataset_2['train'],
    eval_dataset=valid_dataset,
    tokenizer=tokenizer,
    )
  
  eval_results = trainer.evaluate()

  print('Evaluation results: ', eval_results)
  print(f"Perplexity: {math.exp(eval_results['eval_loss']):.3f}")
  print('----------------\n')

Didn't find file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/added_tokens.json. We won't load it.
loading file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/vocab.txt
loading file None
loading file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/special_tokens_map.json
loading file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/tokenizer_config.json
loading configuration file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/config.json
Model config DistilBertConfig {
  "_name_or_path": "/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1

Model:  /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model


All model checkpoint weights were used when initializing DistilBertForMaskedLM.

All the weights of DistilBertForMaskedLM were initialized from the model checkpoint at /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use DistilBertForMaskedLM for predictions without further training.
No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForMaskedLM.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForMaskedLM.forward`,  you ca

Evaluation results:  {'eval_loss': 1.864656925201416, 'eval_runtime': 267.3784, 'eval_samples_per_second': 169.617, 'eval_steps_per_second': 21.202}
Perplexity: 6.454
----------------



Let's check the performances of the original model and let's compare them with the adaptive-tuned one.

In [None]:
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-cased', use_fast = False, do_lower_case=True)
model = AutoModelForMaskedLM.from_pretrained('distilbert-base-cased')

trainer = Trainer(
  model=model,
  data_collator=data_collator,a
  #train_dataset=tokenized_dataset_2['train'],
  eval_dataset=valid_dataset,
  tokenizer=tokenizer,
  )

eval_results = trainer.evaluate()

print('Evaluation results: ', eval_results)
print(f"Perplexity: {math.exp(eval_results['eval_loss']):.3f}")
print('----------------\n')

loading configuration file https://huggingface.co/distilbert-base-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ebe1ea24d11aa664488b8de5b21e33989008ca78f207d4e30ec6350b693f073f.302bfd1b5e031cc1b17796e0b6e5b242ba2045d31d00f97589e12b458ebff27a
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-cased",
  "activation": "gelu",
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.18.0",
  "vocab_size": 28996
}

loading file https://huggingface.co/distilbert-base-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/ba377304984dc63e3ede0e23a938bbbf04d5c3835b66d5bb48343aecca188429.4

Evaluation results:  {'eval_loss': 3.5197696685791016, 'eval_runtime': 276.2442, 'eval_samples_per_second': 164.174, 'eval_steps_per_second': 20.522}
Perplexity: 33.777
----------------



In [None]:
#FIRST NEED TO TRAIN ON DOWNSTREAM TASK

from transformers import BertForQuestionAnswering
from transformers import BertTokenizer

#Model
model = BertForQuestionAnswering.from_pretrained("/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model")

#Tokenizer
tokenizer = BertTokenizer.from_pretrained("/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model")

loading configuration file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/config.json
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Model config BertConfig {
  "_name_or_path": "distilbert-base-cased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "dim": 768,
  "dropout": 0.1,
  "hidden_act": "gelu",
  "hidden_dim": 3072,
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "n_heads": 12,
  "n_layers": 6,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.

In [None]:
from transformers import pipeline

unmasker_custom = pipeline('fill-mask', model="/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model")
unmasker_distilbert = pipeline('fill-mask', model= 'distilbert-base-cased')

In [None]:
unmasker_distilbert("remote [MASK] are complicated")

[{'score': 0.11297765374183655,
  'sequence': 'remote sensing are complicated',
  'token': 16986,
  'token_str': 'sensing'},
 {'score': 0.05963169410824776,
  'sequence': 'remote controls are complicated',
  'token': 7451,
  'token_str': 'controls'},
 {'score': 0.03973941504955292,
  'sequence': 'remote locations are complicated',
  'token': 4541,
  'token_str': 'locations'},
 {'score': 0.018463315442204475,
  'sequence': 'remote communications are complicated',
  'token': 6678,
  'token_str': 'communications'},
 {'score': 0.013456997461616993,
  'sequence': 'remote channels are complicated',
  'token': 6412,
  'token_str': 'channels'}]

In [None]:
unmasker_custom("remote [MASK] are complicated")

[{'score': 0.28011277318000793,
  'sequence': 'remote sensing are complicated',
  'token': 16986,
  'token_str': 'sensing'},
 {'score': 0.047650448977947235,
  'sequence': 'remote measurements are complicated',
  'token': 12307,
  'token_str': 'measurements'},
 {'score': 0.03648458793759346,
  'sequence': 'remote locations are complicated',
  'token': 4541,
  'token_str': 'locations'},
 {'score': 0.03406331688165665,
  'sequence': 'remote sensors are complicated',
  'token': 15145,
  'token_str': 'sensors'},
 {'score': 0.026522010564804077,
  'sequence': 'remotes are complicated',
  'token': 1116,
  'token_str': '##s'}]

In [None]:
unmasker_distilbert(" the mission offered a technological and operational [MASK] to demonstrate the value of autonomous vehicles ")

[{'score': 0.2572871744632721,
  'sequence': 'the mission offered a technological and operational framework to demonstrate the value of autonomous vehicles',
  'token': 8297,
  'token_str': 'framework'},
 {'score': 0.1291230022907257,
  'sequence': 'the mission offered a technological and operational platform to demonstrate the value of autonomous vehicles',
  'token': 3482,
  'token_str': 'platform'},
 {'score': 0.08356285840272903,
  'sequence': 'the mission offered a technological and operational basis to demonstrate the value of autonomous vehicles',
  'token': 3142,
  'token_str': 'basis'},
 {'score': 0.029744407162070274,
  'sequence': 'the mission offered a technological and operational opportunity to demonstrate the value of autonomous vehicles',
  'token': 3767,
  'token_str': 'opportunity'},
 {'score': 0.022460544481873512,
  'sequence': 'the mission offered a technological and operational capability to demonstrate the value of autonomous vehicles',
  'token': 11137,
  'token

In [None]:
unmasker_custom("Let's launch a [MASK]")

[{'score': 0.78572016954422,
  'sequence': "Let's launch a.",
  'token': 119,
  'token_str': '.'},
 {'score': 0.18195417523384094,
  'sequence': "Let's launch a!",
  'token': 106,
  'token_str': '!'},
 {'score': 0.015179160982370377,
  'sequence': "Let's launch a?",
  'token': 136,
  'token_str': '?'},
 {'score': 0.00092350784689188,
  'sequence': "Let's launch a ;",
  'token': 132,
  'token_str': ';'},
 {'score': 0.0008175468537956476,
  'sequence': "Let's launch a -",
  'token': 118,
  'token_str': '-'}]

#Upload the model to hugging face

In [None]:
%%capture
!pip install huggingface_hub

In [None]:
#login hugging face
from huggingface_hub import notebook_login

notebook_login()

Login successful
Your token has been saved to /root/.huggingface/token


In [None]:
#convert pythorch model to tensorflow
tf_model = transformers.TFDistilBertForMaskedLM.from_pretrained(path, from_pt=True)
tf_model.save_pretrained("/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/tf_model")

In [None]:
#convert tesorflow model to pythorch
tf_model_path = "/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/tf_model"

pt_model = DistilBertForMaskedLM.from_pretrained(tf_model_path, from_tf=True)
pt_model.save_pretrained("/content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/pt_model")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path = path)

Didn't find file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/tokenizer.json. We won't load it.
Didn't find file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/added_tokens.json. We won't load it.
loading file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/vocab.txt
loading file None
loading file None
loading file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/special_tokens_map.json
loading file /content/drive/MyDrive/Colab Notebooks/remote-sensing-bert/model/tokenizer_config.json


In [None]:
#push model on hugging face
pt_model.push_to_hub("remote-sensing-distilbert-cased")
tokenizer.push_to_hub("remote-sensing-distilbert-cased")

tokenizer config file saved in remote-sensing-distilbert-cased/tokenizer_config.json
Special tokens file saved in remote-sensing-distilbert-cased/special_tokens_map.json
remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/Chramer/remote-sensing-distilbert-cased
   317a85e..c93cc0f  main -> main



'https://huggingface.co/Chramer/remote-sensing-distilbert-cased/commit/c93cc0fab79adfa4afe9e317d0bc162d9ed0c908'