In [None]:
!pip install transformers
!pip install transformers[torch]
!pip install accelerate -U

Collecting accelerate>=0.20.3 (from transformers[torch])
  Downloading accelerate-0.26.1-py3-none-any.whl (270 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.26.1


In [None]:
import torch
import pandas as pd
import numpy as np
import random
from transformers import BertForSequenceClassification, BertTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding, EarlyStoppingCallback, get_linear_schedule_with_warmup
from google.colab import drive
from torch.optim import SGD
from sklearn.model_selection import train_test_split

In [None]:
# Set a random seed for PyTorch, random, and NumPy
seed = 30  # You can choose any seed value
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
drive.mount("/content/drive")
link = "/content/drive/My Drive/large_new_data_copy.csv"
data = pd.read_csv(link, encoding='ISO-8859-1')
data.head()

Mounted at /content/drive


Unnamed: 0,Address,Target
0,"10, Gaurav Apartments, Nahur Road, Behind Asho...",1
1,"Imperial Towers, A2, 1701, Nirmal nagari, Khad...",1
2,"im not yours when address is there, testtttt ...",0
3,"im not yours when address is there, testtttt ...",0
4,"im not yours when address is there, dsfsdfsdfs...",0


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99758 entries, 0 to 99757
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Address  99758 non-null  object
 1   Target   99758 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.5+ MB


In [None]:
data = data.sample(frac=1)

In [None]:
data['Target'].value_counts()

0    51012
1    48746
Name: Target, dtype: int64

In [None]:
# tokenizing and model
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_data, test_data = train_test_split(data, test_size = 0.3)
train_data.shape, test_data.shape

((69830, 2), (29928, 2))

In [None]:
# Tokenize and encode the data
def tokenize_data(data, tokenizer, max_length=128):
    tokenized_data = tokenizer(
        data['Address'].tolist(),
        padding="max_length",
        truncation=True,
        return_tensors="pt",
        max_length=max_length
    )
    labels = torch.tensor(data['Target'].tolist())
    tokenized_data['labels'] = labels
    return tokenized_data

train_tokenized_data = tokenize_data(train_data, tokenizer)
test_tokenized_data = tokenize_data(test_data, tokenizer)

In [None]:
# Create DataLoaders
batch_size = 5  # Adjust the batch size as needed

train_dataset = torch.utils.data.TensorDataset(
    train_tokenized_data['input_ids'],
    train_tokenized_data['attention_mask'],
    train_tokenized_data['labels']
)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define training arguments

learning_rate = 2e-5

In [None]:
test_dataset = torch.utils.data.TensorDataset(
    test_tokenized_data['input_ids'],
    test_tokenized_data['attention_mask'],
    test_tokenized_data['labels']
)
test_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
# training
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    eval_steps=500,
    save_total_limit=2,
    per_device_train_batch_size=3,
    per_device_eval_batch_size=3,
    num_train_epochs=6,
    save_steps=500,
    save_strategy="steps",
    remove_unused_columns=False,

)

In [None]:
from torch.nn import CrossEntropyLoss
# Define a loss function
loss_fn = CrossEntropyLoss()

# Define an optimizer
optimizer = SGD(model.parameters(), lr = learning_rate)

In [None]:
early_stop_callback = EarlyStoppingCallback(early_stopping_patience=1, early_stopping_threshold = 0.01)

In [None]:
num_train_steps = len(train_dataloader)*training_args.num_train_epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 1000, num_training_steps = num_train_steps)

In [None]:
num_train_steps

83796

In [None]:
# Initialize early stopping parameters
patience = 1  # Early stopping patience
no_improvement_counter = 0
best_loss = float('inf')

# Training loop
for epoch in range(training_args.num_train_epochs):
    model.train()
    total_loss = 0.0
    for batch in train_dataloader:
        inputs = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)

        # Forward pass
        outputs = model(inputs, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step() # update the learning rate

        total_loss += loss.item()

    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1} - Average Loss: {avg_loss}")

    # Early stopping
    if avg_loss < best_loss:
        best_loss = avg_loss
        no_improvement_counter = 0  # Reset counter
    else:
        no_improvement_counter += 1

    if no_improvement_counter >= patience:
        print("Early stopping triggered due to no improvement in validation loss for "
              f"{patience} consecutive epochs.")
        break  # End training loop


Epoch 1 - Average Loss: 0.1140029540998464
Epoch 2 - Average Loss: 0.008498021145878042
Epoch 3 - Average Loss: 0.0050798641367400385
Epoch 4 - Average Loss: 0.004514582221427342
Epoch 5 - Average Loss: 0.0036943082000547973
Epoch 6 - Average Loss: 0.003620370466767541


In [None]:
# Evaluate the model
model.eval()
with torch.no_grad():
    total_preds = []
    total_labels = []
    for batch in test_dataloader:
        inputs = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)

        outputs = model(inputs, attention_mask=attention_mask)
        logits = outputs.logits

        preds = np.argmax(logits.cpu().numpy(), axis=1)  # Convert to NumPy array and specify axis
        total_preds.extend(preds)
        total_labels.extend(labels.cpu().numpy())

    accuracy = (np.array(total_preds) == np.array(total_labels)).mean()
    print("Test Accuracy:", accuracy)


Test Accuracy: 0.999742231132751


In [None]:
#model.save_pretrained('/content/drive/My Drive/Address_optimizer/large_new_data')

drive.mount("/content/drive")

# Save the entire model, including the state_dict, optimizer state, and other information
torch.save(model.state_dict(), '/content/drive/My Drive/Address_optimizer/large_new_data/pytorch_model.bin')

# Save the configuration and tokenizer separately (if needed)
model.config.save_pretrained('/content/drive/My Drive/Address_optimizer/large_new_data')
tokenizer.save_pretrained('/content/drive/My Drive/Address_optimizer/large_new_data')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


('/content/drive/My Drive/Address_optimizer/large_new_data/tokenizer_config.json',
 '/content/drive/My Drive/Address_optimizer/large_new_data/special_tokens_map.json',
 '/content/drive/My Drive/Address_optimizer/large_new_data/vocab.txt',
 '/content/drive/My Drive/Address_optimizer/large_new_data/added_tokens.json')

In [None]:
import torch
import numpy as np

def classify_unknown_address(unknown_address, tokenizer, model, device):
    # Tokenize the unknown address
    encoded_input = tokenizer(unknown_address, padding="max_length", truncation=True, return_tensors="pt")
    encoded_input.to(device)  # Move input to the same device as the model
    model.eval()

    with torch.no_grad():
        output = model(**encoded_input)
        logits = output.logits

        # Calculate the class probabilities using softmax
        probabilities = torch.softmax(logits, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()

    # Define a mapping from class index to statement
    class_to_statement = {
        0: "improper",
        1: "proper"
    }

    # Print the input address and the classification statement
    print("Input Address:", unknown_address)
    classification_statement = f"The address is {class_to_statement[predicted_class]} with a probability of {probabilities[0][predicted_class]:.2f}."
    print(classification_statement)

    return classification_statement


In [None]:
# proper
unknown_address = '3rd Floor, Lakshmi Associates, Gandhi Bazaar Main Road, Above Reliance Trends, Basavanagudi, Bangalore'
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)


Input Address: 3rd Floor, Lakshmi Associates, Gandhi Bazaar Main Road, Above Reliance Trends, Basavanagudi, Bangalore
The address is proper with a probability of 1.00.


In [None]:
# proper
unknown_address = "501, New Friends Building, Junction of KC Marg, Off Bazaar Road, Bnadra West, Mumbai Maharashtra - 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 501, New Friends Building, Junction of KC Marg, Off Bazaar Road, Bnadra West, Mumbai Maharashtra - 400050
The address is proper with a probability of 1.00.


In [None]:
# proper
unknown_address = "University of Mumbai,Vidya Nagari, Kalina, Santacruz East, Mumbai, Maharashtra 400098"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)


Input Address: University of Mumbai,Vidya Nagari, Kalina, Santacruz East, Mumbai, Maharashtra 400098
The address is proper with a probability of 1.00.


In [None]:
#Improper # add
unknown_address = "University of mnjhuytfdxs, Vidya Nagari, Santacruz East, Mumbai,Maharashtra 400098"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)


Input Address: University of mnjhuytfdxs, Vidya Nagari, Santacruz East, Mumbai,Maharashtra 400098
The address is proper with a probability of 1.00.


In [None]:
#Improper
unknown_address = "University of piytr, Vidya Nagari, g##585, Santacruz East, Mumbai, Maharashtra 400098"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: University of piytr, Vidya Nagari, g##585, Santacruz East, Mumbai, Maharashtra 400098
The address is proper with a probability of 1.00.


In [None]:
#Improper
unknown_address = "New Friends , crftyujnbvg on KC Marg, Bandra West ,Mumbai, 4000050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: New Friends , crftyujnbvg on KC Marg, Bandra West ,Mumbai, 4000050
The address is improper with a probability of 0.79.


In [None]:
#proper
unknown_address = "Spectrum Tower, A-102, Wework, New Link Rd, Malad West, Mumbai, Maharashtra 400064"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Spectrum Tower, A-102, Wework, New Link Rd, Malad West, Mumbai, Maharashtra 400064
The address is proper with a probability of 1.00.


In [None]:
#Improper
unknown_address = "Spectrum Tower, A-102, WeWork , New Link Rd, xdrftgbvcfg West, Mumbai, Maharashtra 400064"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Spectrum Tower, A-102, WeWork , New Link Rd, xdrftgbvcfg West, Mumbai, Maharashtra 400064
The address is proper with a probability of 0.99.


In [None]:
#Improper
unknown_address = "Spectrum xdrftgh, A-102, WeWork , New  Rd, Malad West, Mumbai, 400064"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device) # add in data as 0 and test by
# replacing the improper words

Input Address: Spectrum xdrftgh, A-102, WeWork , New  Rd, Malad West, Mumbai, 400064
The address is improper with a probability of 1.00.


In [None]:
#proper
unknown_address = "Shop No 101, 1st Floor Hill Road, Bandra West Next to St Stanislaus School, Mumbai, Maharashtra 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Shop No 101, 1st Floor Hill Road, Bandra West Next to St Stanislaus School, Mumbai, Maharashtra 400050
The address is proper with a probability of 1.00.


In [None]:
#Improper
unknown_address = "Shop No 101, 1st xderfgh Hill Road, Bandra West Next to St Stanislaus School, bvgyhnbvf, 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Shop No 101, 1st xderfgh Hill Road, Bandra West Next to St Stanislaus School, bvgyhnbvf, 400050
The address is proper with a probability of 1.00.


Below one not included in accuracy calculation

In [None]:
unknown_address = "Spectrum Tower, A-102, WeWork , mjubvfdef  Rd, Malad West, Mumbai, 400064"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device) # add in data as 0 and test by
# replacing the improper words

Input Address: Spectrum Tower, A-102, WeWork , mjubvfdef  Rd, Malad West, Mumbai, 400064
The address is proper with a probability of 0.90.


In [None]:
unknown_address = "Spectrum Tower, A-102, WeWork , New Link Rd, Malad West, Mumbai, 400064"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device) # add in data as 0 and test by
# replacing the improper words

Input Address: Spectrum Tower, A-102, WeWork , New Link Rd, Malad West, Mumbai, 400064
The address is improper with a probability of 0.97.


### FOR ACCURACY

In [None]:
# proper1
unknown_address = "505, Manuel Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 505, Manuel Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050
The address is proper with a probability of 1.00.


In [None]:
# Proper2
unknown_address = "WING-E, 13-14, Off, New Link Rd, opp. Movie Time Cinema, Evershine Nagar, Malad West, Mumbai, Maharashtra 400064"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: WING-E, 13-14, Off, New Link Rd, opp. Movie Time Cinema, Evershine Nagar, Malad West, Mumbai, Maharashtra 400064
The address is proper with a probability of 1.00.


In [None]:
# IMproper1
unknown_address = "505, zdrtghjui Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050 "
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 505, zdrtghjui Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050 
The address is proper with a probability of 1.00.


In [None]:
# ImProper2
unknown_address = "7, ertyuiopjhbvc, Grant Plaza,Vasai, Mumbai, Maharashtra 347605"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 7, ertyuiopjhbvc, Grant Plaza,Vasai, Mumbai, Maharashtra 347605
The address is improper with a probability of 1.00.


In [None]:
# ImProper3
unknown_address = "xdftgbhusd, Apollo Bandar, Colaba, Mumbai, Maharashtra 400001"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: xdftgbhusd, Apollo Bandar, Colaba, Mumbai, Maharashtra 400001
The address is proper with a probability of 0.97.


In [None]:
# Improper4
unknown_address = "702 Western abcdefgh, Sir Mathuradas Vasanji Rd Junction, Mumbai, Maharashtra 400069"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 702 Western abcdefgh, Sir Mathuradas Vasanji Rd Junction, Mumbai, Maharashtra 400069
The address is proper with a probability of 1.00.


In [None]:
# ImProper5
unknown_address = "702 Western Express Highway, Sir Mathuradas Vasanji Rd Junction, Mumbai, Maharashtra 400069"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 702 Western Express Highway, Sir Mathuradas Vasanji Rd Junction, Mumbai, Maharashtra 400069
The address is proper with a probability of 1.00.


In [None]:
# Proper3
unknown_address = "Pokharan Rd Number 2, Siddhachal Housing Society, Pawar Nagar, Thane West, Thane, Maharashtra 400610"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Pokharan Rd Number 2, Siddhachal Housing Society, Pawar Nagar, Thane West, Thane, Maharashtra 400610
The address is proper with a probability of 1.00.


In [None]:
# ImProper6
unknown_address = "Pokharan Rd Number 2, xyzxyzxyz, Pawar Nagar, Thane West, Thane, Maharashtra 400610"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Pokharan Rd Number 2, xyzxyzxyz, Pawar Nagar, Thane West, Thane, Maharashtra 400610
The address is proper with a probability of 0.99.


In [None]:
# ImProper7
unknown_address = "Suresh Complex, S.No-151/12/1, wqsdrtgfthnk - Kharadi Rd, Hadapsar, Pune, Maharashtra 411013"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Suresh Complex, S.No-151/12/1, wqsdrtgfthnk - Kharadi Rd, Hadapsar, Pune, Maharashtra 411013
The address is proper with a probability of 1.00.


In [None]:
# Proper4
unknown_address = "19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India
The address is proper with a probability of 1.00.


In [None]:
# ImProper8
unknown_address = "19th Floor, lkjhbgvfcdfvgb& Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19th Floor, lkjhbgvfcdfvgb& Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India
The address is proper with a probability of 1.00.


In [None]:
# ImProper9
unknown_address = " qwertytfghjn Floor, Sameer Rd,Gold's Gym,opp.Bank of nbv$%%^#fcdxfgh, Bandra West, Mumbai, Maharashtra 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address:  qwertytfghjn Floor, Sameer Rd,Gold's Gym,opp.Bank of nbv$%%^#fcdxfgh, Bandra West, Mumbai, Maharashtra 400050
The address is proper with a probability of 0.99.


In [None]:
# Proper5
unknown_address = "19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India
The address is proper with a probability of 1.00.


In [None]:
# ImProper10
unknown_address = " qwertytfghjn Floor,Sameer Rd,Gold's Gym,opp.Bank of nbv$%%^#fcdxfgh, Mumbai, 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address:  qwertytfghjn Floor,Sameer Rd,Gold's Gym,opp.Bank of nbv$%%^#fcdxfgh, Mumbai, 400050
The address is proper with a probability of 0.98.


In [None]:
# Proper6
unknown_address = "Plot No.C-68, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Mumbai, Maharashtra 400051"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Plot No.C-68, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Mumbai, Maharashtra 400051
The address is proper with a probability of 1.00.


In [None]:
# Proper7
unknown_address = "Office no 39, P. M. Road, Next to Santacruz Station, Santacruz (West), Mumbai, Maharashtra 400054"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Office no 39, P. M. Road, Next to Santacruz Station, Santacruz (West), Mumbai, Maharashtra 400054
The address is proper with a probability of 1.00.


In [None]:
# Improper11
unknown_address = "#@^&Ljxxz, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Xr#lls, Maharashtra 400051"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: #@^&Ljxxz, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Xr#lls, Maharashtra 400051
The address is improper with a probability of 0.97.


In [None]:
# Improper12
unknown_address = "Plot no. 53, WzzXYZ%$** Area, Sector 32, Gurugram, vcdxerftghb 122018"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Plot no. 53, WzzXYZ%$** Area, Sector 32, Gurugram, vcdxerftghb 122018
The address is improper with a probability of 0.94.


In [None]:
# Improper13
unknown_address = "19, Bandra Kurla Complex, aaaaaaa, Mumbai, Maharashtra 400067"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19, Bandra Kurla Complex, aaaaaaa, Mumbai, Maharashtra 400067
The address is proper with a probability of 1.00.


### WITH Threshold ==========================================

In [None]:
class_to_threshold = {
    0: 0.5,
    1: 0.7
}

In [None]:
import torch
import numpy as np

def classify_unknown_address(unknown_address, tokenizer, model, device):
    # Tokenize the unknown address
    encoded_input = tokenizer(unknown_address, padding="max_length", truncation=True, return_tensors="pt")
    encoded_input.to(device)  # Move input to the same device as the model
    model.eval()

    with torch.no_grad():
        output = model(**encoded_input)
        logits = output.logits

        # Calculate the class probabilities using softmax
        probabilities = torch.softmax(logits, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()

        # If the probability of the predicted class is below the threshold, then return the other class
        if class_to_threshold is not None and probabilities[0][predicted_class] < class_to_threshold[predicted_class]:
              predicted_class = 1 - predicted_class

    # Define a mapping from class index to statement
    class_to_statement = {
        0: "improper",
        1: "proper"
    }

    # Print the input address and the classification statement
    print("Input Address:", unknown_address)
    classification_statement = f"The address is {class_to_statement[predicted_class]} with a probability of {probabilities[0][predicted_class]:.2f}."
    print(classification_statement)

    return classification_statement


In [None]:
# proper1
unknown_address = "505, Manuel Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 505, Manuel Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050
The address is proper with a probability of 1.00.


In [None]:
# Proper2
unknown_address = "WING-E, 13-14, Off, New Link Rd, opp. Movie Time Cinema, Evershine Nagar, Malad West, Mumbai, Maharashtra 400064"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: WING-E, 13-14, Off, New Link Rd, opp. Movie Time Cinema, Evershine Nagar, Malad West, Mumbai, Maharashtra 400064
The address is proper with a probability of 1.00.


In [None]:
# IMproper1
unknown_address = "505, zdrtghjui Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050 "
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 505, zdrtghjui Gonsalves Rd, Bandra West, Mumbai, Maharashtra 400050 
The address is proper with a probability of 1.00.


In [None]:
# ImProper2
unknown_address = "7, ertyuiopjhbvc, Grant Plaza,Vasai, Mumbai, Maharashtra 347605"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 7, ertyuiopjhbvc, Grant Plaza,Vasai, Mumbai, Maharashtra 347605
The address is improper with a probability of 1.00.


In [None]:
# ImProper3
unknown_address = "xdftgbhusd, Apollo Bandar, Colaba, Mumbai, Maharashtra 400001"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: xdftgbhusd, Apollo Bandar, Colaba, Mumbai, Maharashtra 400001
The address is proper with a probability of 0.97.


In [None]:
# Improper4
unknown_address = "702 Western Express Highway, Sir Mathuradas Vasanji Rd, Junction, Mumbai, cxdertyhnjmk 400069"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 702 Western Express Highway, Sir Mathuradas Vasanji Rd, Junction, Mumbai, cxdertyhnjmk 400069
The address is proper with a probability of 0.98.


In [None]:
# ImProper5
unknown_address = "702 Western Express Highway, cdrfvghyghjuikmj, Junction, Mumbai, Maharashtra 400069"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 702 Western Express Highway, cdrfvghyghjuikmj, Junction, Mumbai, Maharashtra 400069
The address is proper with a probability of 0.99.


In [None]:
# Proper3
unknown_address = "Pokharan Rd Number 2, Siddhachal Housing Society, Pawar Nagar, Thane West, Thane, Maharashtra 400610"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Pokharan Rd Number 2, Siddhachal Housing Society, Pawar Nagar, Thane West, Thane, Maharashtra 400610
The address is proper with a probability of 1.00.


In [None]:
# ImProper6
unknown_address = "Pokharan Rd Number 2, vcdfvbh, Pawar Nagar, Thane West, Thane, Maharashtra 400610"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Pokharan Rd Number 2, vcdfvbh, Pawar Nagar, Thane West, Thane, Maharashtra 400610
The address is proper with a probability of 1.00.


In [None]:
# ImProper7
unknown_address = "Suresh Complex, S.No-151/12/1, wqsdrtgfthnk - Kharadi Rd, Hadapsar, Pune, Maharashtra 411013"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Suresh Complex, S.No-151/12/1, wqsdrtgfthnk - Kharadi Rd, Hadapsar, Pune, Maharashtra 411013
The address is proper with a probability of 1.00.


In [None]:
# Proper4
unknown_address = "19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India
The address is proper with a probability of 1.00.


In [None]:
# ImProper8
unknown_address = "19th Floor, ZxxZ&*$# Tower C, UB City, No.24, Vittal Mallya Road, xzsdftvgbh 560001, India"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19th Floor, ZxxZ&*$# Tower C, UB City, No.24, Vittal Mallya Road, xzsdftvgbh 560001, India
The address is proper with a probability of 1.00.


In [None]:
# ImProper9
unknown_address = " qwertytfghjn Floor, Sameer Rd,Gold's Gym,opp.Bank of nv$%%^#fcdxfg, Bandra West, Mumbai, Maharashtra 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address:  qwertytfghjn Floor, Sameer Rd,Gold's Gym,opp.Bank of nv$%%^#fcdxfg, Bandra West, Mumbai, Maharashtra 400050
The address is proper with a probability of 0.99.


In [None]:
# Proper5
unknown_address = "19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19th Floor, Concorde Tower C, UB City, No.24, Vittal Mallya Road, Bangalore 560001, India
The address is proper with a probability of 1.00.


In [None]:
# ImProper10
unknown_address = " qwertytfghjn Floor,Sameer Rd,Gold's Gym,opp.Bank of nbv$%%^#fcdxfgh, Mumbai, 400050"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address:  qwertytfghjn Floor,Sameer Rd,Gold's Gym,opp.Bank of nbv$%%^#fcdxfgh, Mumbai, 400050
The address is proper with a probability of 0.98.


In [None]:
# Proper6
unknown_address = "Plot No.C-68, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Mumbai, Maharashtra 400051"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Plot No.C-68, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Mumbai, Maharashtra 400051
The address is proper with a probability of 1.00.


In [None]:
# Proper7
unknown_address = "Office no 39, P. M. Road, Next to Santacruz Station, Santacruz (West), Mumbai, Maharashtra 400054"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Office no 39, P. M. Road, Next to Santacruz Station, Santacruz (West), Mumbai, Maharashtra 400054
The address is proper with a probability of 1.00.


In [None]:
# Improper11
unknown_address = "#@^&Ljxxz, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Xr#lls, Maharashtra 400051"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: #@^&Ljxxz, एवेन्यू 3, G Block BKC, Bandra Kurla Complex, Bandra East, Xr#lls, Maharashtra 400051
The address is improper with a probability of 0.97.


In [None]:
# Improper12
unknown_address = "Plot no. 53, qwertyuiop Area, Sector 32, Gurugram, Haryana 122018"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Plot no. 53, qwertyuiop Area, Sector 32, Gurugram, Haryana 122018
The address is proper with a probability of 1.00.


In [None]:
# Improper13
unknown_address = "19, Bandra Kurla Complex, aaaaaaa, Mumbai, Maharashtra 400067"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: 19, Bandra Kurla Complex, aaaaaaa, Mumbai, Maharashtra 400067
The address is proper with a probability of 1.00.


In [None]:
# Improper
unknown_address = "some address, XYZ Company, some street, bbbbbbbbbbbbbbbbbbbbbbbbbb, Mumbai, Maharashtra 400077"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: some address, XYZ Company, some street, bbbbbbbbbbbbbbbbbbbbbbbbbb, Mumbai, Maharashtra 400077
The address is proper with a probability of 1.00.


In [None]:
# Improper
unknown_address = "Z Block, Voltas Premises, TB Kadam Marg, Chinchpokli, Mumbai, Maharashtra, 400011"
predicted_class = classify_unknown_address(unknown_address, tokenizer, model, device)

Input Address: Z Block, Voltas Premises, TB Kadam Marg, Chinchpokli, Mumbai, Maharashtra, 400011
The address is proper with a probability of 1.00.
