In [1]:
import requests
import json
import torch
import os
from tqdm import tqdm
import pandas as pd

In [2]:
#import dataset
temp1 = pd.read_excel("./dataset_maker.xlsx", sheet_name="1) target", usecols="U:Y")
temp2 = pd.read_excel("./dataset_maker.xlsx", sheet_name="2) form", usecols="S:W")
temp3 = pd.read_excel("./dataset_maker.xlsx", sheet_name="3) dist", usecols="R:V")
temp4 = pd.read_excel("./dataset_maker.xlsx", sheet_name="4) target & form", usecols="AD:AH")
temp5 = pd.read_excel("./dataset_maker.xlsx", sheet_name="5) target & dist", usecols="AD:AH")
temp6 = pd.read_excel("./dataset_maker.xlsx", sheet_name="6) form & dist", usecols="AB:AF")
temp7 = pd.read_excel("./dataset_maker.xlsx", sheet_name="7) target & form & dist", usecols="AM:AQ")

In [3]:
print(len(temp1),len(temp2),len(temp3),len(temp4),len(temp5),len(temp6),len(temp7))

1000 1000 1000 1000 1000 1000 1000


In [4]:
temp_total = pd.concat([temp1, temp2, temp3, temp4, temp5, temp6, temp7], ignore_index=True)[["Query","(x,y)","Formation","Distance"]]
temp_total = temp_total.astype(str)

def extract_values(row):
    if row=="False":  return False, False
    else:
      row = row[1:-1].split(',')
      return row
      
temp_total[['x', 'y']] = temp_total['(x,y)'].apply(lambda row: pd.Series(extract_values(row)))
temp_total = temp_total.drop(columns=['(x,y)'])
print(temp_total.isna().sum())
temp_total

Query        0
Formation    0
Distance     0
x            0
y            0
dtype: int64


Unnamed: 0,Query,Formation,Distance,x,y
0,The drones are allocated for assess the extent...,False,0,-92,-63
1,The drones must tail the cargo ship as it nav...,False,0,87,56
2,The drones are engaged in the duty of survey a...,False,0,-93,72
3,The drones are dedicated to support firefighte...,False,0,-28,-10
4,The drones are mobilized to achieve capture br...,False,0,38,-7
...,...,...,...,...,...
6995,The drones are enlisted for tail the cargo shi...,grid,3,88,85
6996,The drones are tasked with assist in tracking...,grid,3,65,-35
6997,The drones aim to track the maritime vessel's ...,grid,8,-96,88
6998,The drones are charged with support firefighte...,triangle,6,21,-21


In [5]:
#dataframe shuffle
temp_total = temp_total.sample(frac=1).reset_index(drop=True)
temp_total['ans'] = temp_total[['x','y','Formation','Distance']].values.tolist()
temp_total = temp_total[["Query", "ans"]]
temp_total

Unnamed: 0,Query,ans
0,The objective entails the accomplishment of as...,"[8, 73, False, 0]"
1,The drones must carry out assist in fighting a...,"[21, 70, swarm, 8]"
2,The drones are continuously monitoring the shi...,"[-52, 98, zigzag, 12]"
3,Our mission is centered on align the flight pa...,"[False, False, zigzag, 0]"
4,The drones are on a mission to conduct a geol...,"[26, -30, zigzag, 6]"
...,...,...
6995,We are dedicated to assisting law enforcement ...,"[100, 52, triangle, 11]"
6996,The drones are assigned to arrange the drones...,"[False, False, grid, 7]"
6997,We are obligated to survey a remote mountain p...,"[-28, -17, False, 14]"
6998,The drones are dispatched with the purpose of ...,"[56, -21, False, 0]"


In [6]:
#train and test
train_datasets = temp_total[:int(len(temp_total)*0.7)]
test_datasets = temp_total[int(len(temp_total)*0.7):]

temp_total_query, temp_total_ans = temp_total['Query'].tolist(), temp_total['ans'].tolist()
train_query, train_ans = train_datasets['Query'].tolist(), train_datasets['ans'].tolist()
test_query, test_ans = test_datasets['Query'].tolist(), test_datasets['ans'].tolist()

In [7]:
train_query[0:5]

['The objective entails the accomplishment of assist in tracking a pack of endangered wolves presently occupying (8,73) for this operation.',
 'The drones must carry out assist in fighting a raging wildfire presently based in (21,70) and organize the drones structured in the pattern of throng and align the drones amid the length of 8 as quickly as possible.',
 "The drones are continuously monitoring the ship's movements presently found in (-52,98) and sync up the drones taking the form of wind and align the drones spanning the distance of 12 in support of the task.",
 'Our mission is centered on align the flight paths of the drones molded in the image of wavy during this operation.',
 'The drones are on a mission to  conduct a geological survey of an earthquake-prone region in (26,-30) and configure the drone alignment modeled as meander and arrange the drones across the expanse of 6 using the drones.']

In [8]:
train_ans[0:5]

[['8', '73', 'False', '0'],
 ['21', '70', 'swarm', '8'],
 ['-52', '98', 'zigzag', '12'],
 [False, False, 'zigzag', '0'],
 ['26', '-30', 'zigzag', '6']]

In [24]:
num_formation_classes=5
num_variables = 4
category_mapping = {'False': 0, 'zigzag': 1, 'swarm': 2, 'circle': 3, 'triangle': 4, 'swarm': 5,'grid':6}  # Add more categories as needed

In [27]:
import ast
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertModel
from torch import nn, optim
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, LabelEncoder

class CustomDataset(Dataset):
    def __init__(self, sentences, labels):
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

    def __len__(self):
        return len(self.sentences)

    def parse_label(self, item):
        try:
            return ast.literal_eval(item)
        except (SyntaxError, ValueError):
            return item

    def custom_encode_category(self, category):
        # You can implement your custom encoding logic here
        return category_mapping.get(category, -1)  # -1 for unknown categories

    def reverse_encode_category(self, encoded_category):
        # Reverse the mapping to get the original category
        reverse_mapping = {v: k for k, v in category_mapping.items()}
        return reverse_mapping.get(encoded_category, 'unknown')
    
    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        label = self.labels[idx]

        # Parse string representations into Python types
        try:
            label = [self.parse_label(item) for item in label]
        except Exception as e:
            print(f"Error parsing label in batch {idx}: {e}")
            print("Label:", label)
            raise

        # Convert numerical variables to float and handle the categorical variable
        label_combined = [float(label[0]), float(label[1]), self.custom_encode_category(label[2]),float(label[3])]

        # Tokenize and encode the sentence
        encoding = self.tokenizer(sentence, return_tensors='pt', padding=True, truncation=True)

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label_combined, dtype=torch.float32)
        }


In [29]:
# Create a custom dataset
dataset = CustomDataset(sentences=train_query, labels=train_ans)

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

In [13]:


# Define the model architecture
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.fc = nn.Linear(768, 4)  # Adjust the output size based on the number of variables you want to predict

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        output = self.fc(pooled_output)
        return output

# Instantiate the model and define loss function and optimizer
model = CustomModel()
criterion = nn.MSELoss()  # Adjust the loss function based on your task
optimizer = optim.Adam(model.parameters(), lr=1e-5)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [14]:
# Training loop with tqdm
num_epochs = 5

model.to(device)
model.train()
loss_list = []
for epoch in range(num_epochs):
    total_loss = 0

    with tqdm(total=len(dataloader), desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch', postfix={'loss': 0.0}) as pbar:
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            average_loss = total_loss / (pbar.n + 1)  # Calculate average loss per batch
            pbar.set_postfix(loss=average_loss)
            pbar.update(1)
        loss_list.append(total_loss)


# Save the trained model
#torch.save(model.state_dict(), 'custom_model.pth')


Epoch 1/5: 100%|██████████| 4900/4900 [40:50<00:00,  2.00batch/s, loss=661]
Epoch 2/5: 100%|██████████| 4900/4900 [37:08<00:00,  2.20batch/s, loss=401]
Epoch 3/5: 100%|██████████| 4900/4900 [36:55<00:00,  2.21batch/s, loss=253]
Epoch 4/5: 100%|██████████| 4900/4900 [37:57<00:00,  2.15batch/s, loss=154]
Epoch 5/5: 100%|██████████| 4900/4900 [37:34<00:00,  2.17batch/s, loss=90.6]


In [15]:
torch.save(model.state_dict(), 'custom_model.pth')

In [62]:
# Testing loop with tqdm
model.eval()
test_loss = 0
test_dataset = CustomDataset(sentences=test_query, labels=test_ans)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

with torch.no_grad():
    with tqdm(total=len(test_dataloader), desc='Testing', unit='batch', postfix={'loss': 0.0}) as pbar:
        for batch in test_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            average_loss = test_loss / (pbar.n + 1)  # Calculate average loss per batch
            pbar.set_postfix(loss=average_loss)
            pbar.update(1)

# Calculate the average test loss
average_test_loss = test_loss / len(test_dataloader)
print(f'Average Test Loss: {average_test_loss}')



Testing: 100%|██████████| 2100/2100 [01:15<00:00, 27.64batch/s, loss=66.5]

Average Test Loss: 66.46966520537889





In [33]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

custom_sentence = "The objective entails the accomplishment of assist in tracking a pack of endangered wolves presently occupying (8,73) for this operation."

# Tokenize and convert to tensor
tokenized_input = tokenizer(custom_sentence, return_tensors='pt')
input_ids = tokenized_input['input_ids'].to(device)
attention_mask = tokenized_input['attention_mask'].to(device)

# Make prediction
with torch.no_grad():
    model.eval()
    output = model(input_ids, attention_mask)

# Assuming your model outputs logits, you may want to apply softmax to get probabilities
probabilities = torch.nn.functional.softmax(output, dim=1)

# Get the predicted label
predicted_label = torch.argmax(probabilities).item()

# Print the results
print(f"Custom Sentence: {custom_sentence}")
print(f"Predicted Label: {predicted_label}")
print(f"Class Probabilities: {probabilities}")

Custom Sentence: The objective entails the accomplishment of assist in tracking a pack of endangered wolves presently occupying (8,73) for this operation.
Predicted Label: 1
Class Probabilities: tensor([[1.4586e-27, 1.0000e+00, 5.4999e-33, 9.5251e-33]])


In [61]:
# Load the BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# Input your own custom sentence
custom_sentence = "The objective entails the accomplishment of assist in tracking a pack of endangered wolves presently occupying (8,73) for this operation."

# Tokenize and convert to tensor
tokenized_input = tokenizer(custom_sentence, return_tensors='pt')
input_ids = tokenized_input['input_ids'].to(device)
attention_mask = tokenized_input['attention_mask'].to(device)

# Make prediction
with torch.no_grad():
    model.eval()
    output = model(input_ids, attention_mask)

# Decode the output tensor
decoded_output = output.cpu().numpy()

# Reverse the custom encoding for the category
decoded_category = dataset.reverse_encode_category(int(decoded_output[0, 2]))

# Print the results
print(f"Custom Sentence: {custom_sentence}")
print("Decoded Output - (x, y), dist : ({0:0.2f}, {1:0.2f}), {2:0.2f}".format(
    decoded_output[0, 0],
    decoded_output[0, 1],
    decoded_output[0, 3]))

print(f"Decoded Category: {decoded_category}")
print('[8, 73, False, 0]')

Custom Sentence: The objective entails the accomplishment of assist in tracking a pack of endangered wolves presently occupying (8,73) for this operation.
Decoded Output - (x, y), dist : (12.73, 74.52), 0.79
Decoded Category: False
[8, 73, False, 0]


In [66]:
train_ans[0:5]

[['8', '73', 'False', '0'],
 ['21', '70', 'swarm', '8'],
 ['-52', '98', 'zigzag', '12'],
 [False, False, 'zigzag', '0'],
 ['26', '-30', 'zigzag', '6']]

In [86]:
custom_sentence = ['The objective entails the accomplishment of assist in tracking a pack of endangered wolves presently occupying (8,73) for this operation.',
 'The drones must carry out assist in fighting a raging wildfire presently based in (21,70) and organize the drones structured in the pattern of throng and align the drones amid the length of 8 as quickly as possible.',
 "The drones are continuously monitoring the ship's movements presently found in (-52,98) and sync up the drones taking the form of wind and align the drones spanning the distance of 12 in support of the task.",
 'Our mission is centered on align the flight paths of the drones molded in the image of wavy during this operation.',
 'The drones are on a mission to  conduct a geological survey of an earthquake-prone region in (26,-30) and configure the drone alignment modeled as meander and arrange the drones across the expanse of 6 using the drones.']

custom_ans = [[8, 73, 'False', 0],
 [21, 70, 'swarm', 8],
 [-52, 98, 'zigzag', 12],
 [False, False, 'zigzag', 0],
 [26, -30, 'zigzag', 6]]


with torch.no_grad():
    model.eval()
    for i in range(len(custom_sentence)):
        tokenized_input = tokenizer(custom_sentence[i], return_tensors='pt')
        input_ids = tokenized_input['input_ids'].to(device)
        attention_mask = tokenized_input['attention_mask'].to(device)

        output = model(input_ids, attention_mask)
        decoded_output = output.cpu().numpy()
        print('%-30s%-15s\n'%(custom_ans[i], [round(decoded_output[0,0],2),round(decoded_output[0,1],2),dataset.reverse_encode_category(int(decoded_output[0, 2])),round(decoded_output[0,3],2)]))

[8, 73, 'False', 0]           [12.73, 74.52, 'False', 0.79]

[21, 70, 'swarm', 8]          [22.04, 72.76, 'False', 0.96]

[-52, 98, 'zigzag', 12]       [-37.95, 66.07, 'False', 0.6]

[False, False, 'zigzag', 0]   [-0.06, 0.6, 'circle', 0.1]

[26, -30, 'zigzag', 6]        [28.37, -30.97, 'zigzag', 4.87]



In [90]:
custom_encode = custom_ans.copy()
custom_encode = [[i[0],i[1], dataset.custom_encode_category(i[2]), i[3]] for i in custom_encode]

with torch.no_grad():
    model.eval()
    for i in range(len(custom_sentence)):
        tokenized_input = tokenizer(custom_sentence[i], return_tensors='pt')
        input_ids = tokenized_input['input_ids'].to(device)
        attention_mask = tokenized_input['attention_mask'].to(device)

        output = model(input_ids, attention_mask)
        decoded_output = output.cpu().numpy()
        print('%-30s%-15s\n'%(custom_encode[i], [round(decoded_output[0,0],2),round(decoded_output[0,1],2),round(decoded_output[0,2],2),round(decoded_output[0,3],2)]))

[8, 73, 0, 0]                 [12.73, 74.52, 0.24, 0.79]

[21, 70, 5, 8]                [22.04, 72.76, 0.16, 0.96]

[-52, 98, 1, 12]              [-37.95, 66.07, 0.35, 0.6]

[False, False, 1, 0]          [-0.06, 0.6, 3.8, 0.1]

[26, -30, 1, 6]               [28.37, -30.97, 1.1, 4.87]

