In [2]:
# Install the necessary libraries
!pip install transformers
!pip install torch
!pip install pandas
!pip install scikit-learn


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-

In [7]:

# Import the required libraries
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = '/content/corpsolutions_data.csv'
df = pd.read_csv(file_path)

# Drop rows with missing values
df.dropna(inplace=True)

# Initialize the label encoder
label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Answers '])

# Save the label encoder
pd.to_pickle(label_encoder, '/content/label_encoder.pkl')

# Tokenization and encoding function
def tokenize_and_encode(tokenizer, text, max_length=128):
    encoding = tokenizer.encode_plus(
        text,
        max_length=max_length,
        truncation=True,
        padding='max_length',
        return_tensors='pt',
        return_attention_mask=True,
    )
    return encoding

# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode the questions
input_ids = []
attention_masks = []
labels = []

for idx, row in df.iterrows():
    question = row['Questions ']
    label = row['Label']
    encoding = tokenize_and_encode(tokenizer, question)

    input_ids.append(encoding['input_ids'])
    attention_masks.append(encoding['attention_mask'])
    labels.append(label)

input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

# Split the data into training and validation sets
train_inputs, val_inputs, train_masks, val_masks, train_labels, val_labels = train_test_split(
    input_ids, attention_masks, labels, test_size=0.1)

train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=8)

val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=8)

# Load pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))

# Set up the optimizer
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Train the model
epochs = 19
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(train_dataloader):
        batch_input_ids, batch_attention_masks, batch_labels = tuple(t.to(device) for t in batch)

        outputs = model(input_ids=batch_input_ids,
                        attention_mask=batch_attention_masks,
                        labels=batch_labels)

        loss = outputs.loss
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch+1}, Loss: {avg_train_loss}')

    # Validation
    model.eval()
    eval_loss = 0
    for batch in val_dataloader:
        batch_input_ids, batch_attention_masks, batch_labels = tuple(t.to(device) for t in batch)

        with torch.no_grad():
            outputs = model(input_ids=batch_input_ids,
                            attention_mask=batch_attention_masks,
                            labels=batch_labels)

        loss = outputs.loss
        eval_loss += loss.item()

    avg_val_loss = eval_loss / len(val_dataloader)
    print(f'Validation Loss: {avg_val_loss}')

# Save the fine-tuned model
model.save_pretrained('/content/fine_tuned_bert')
tokenizer.save_pretrained('/content/fine_tuned_bert')


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 4.190215996333531
Validation Loss: 4.2250800132751465
Epoch 2, Loss: 4.066236155373709
Validation Loss: 4.25269079208374
Epoch 3, Loss: 4.033226762499128
Validation Loss: 4.253480434417725
Epoch 4, Loss: 3.992753267288208
Validation Loss: 4.300961494445801
Epoch 5, Loss: 3.887515068054199
Validation Loss: 4.3316121101379395
Epoch 6, Loss: 3.8137001650674
Validation Loss: 4.357764720916748
Epoch 7, Loss: 3.7173622335706438
Validation Loss: 4.3844895362854
Epoch 8, Loss: 3.664858818054199
Validation Loss: 4.391770839691162
Epoch 9, Loss: 3.670928886958531
Validation Loss: 4.415860176086426
Epoch 10, Loss: 3.4916917937142506
Validation Loss: 4.325550556182861
Epoch 11, Loss: 3.454824890409197
Validation Loss: 4.447235107421875
Epoch 12, Loss: 3.326647928782872
Validation Loss: 4.320084095001221
Epoch 13, Loss: 3.2426972729819163
Validation Loss: 4.370346546173096
Epoch 14, Loss: 3.2470193249838695
Validation Loss: 4.311803817749023
Epoch 15, Loss: 3.0704819474901472
Validat

('/content/fine_tuned_bert/tokenizer_config.json',
 '/content/fine_tuned_bert/special_tokens_map.json',
 '/content/fine_tuned_bert/vocab.txt',
 '/content/fine_tuned_bert/added_tokens.json')

In [None]:
#chatbot

In [9]:
# Install the necessary libraries
!pip install transformers
!pip install torch
# Install the necessary libraries
!pip install transformers
!pip install torch
!pip install pandas




In [10]:

# Import the required libraries
import torch
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification

# Load the fine-tuned model and tokenizer
model_dir = '/content/fine_tuned_bert'
tokenizer = BertTokenizer.from_pretrained(model_dir)
model = BertForSequenceClassification.from_pretrained(model_dir)

# Load label encoder to decode predictions
label_encoder = pd.read_pickle('/content/label_encoder.pkl')

def classify_input(user_input):
    # Tokenize the user input
    inputs = tokenizer(user_input, return_tensors='pt', truncation=True, padding=True, max_length=128)

    # Perform inference
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class_id = torch.argmax(logits, dim=1).item()

    # Decode the predicted class
    predicted_class = label_encoder.inverse_transform([predicted_class_id])[0]
    return predicted_class

def main():
    print("Chatbot is ready! Type 'exit' to end the chat.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        response = classify_input(user_input)
        print(f"Chatbot: {response}")

if __name__ == '__main__':
    main()


Chatbot is ready! Type 'exit' to end the chat.
You: Does Cube CorpSolutions have any partnerships?
Chatbot: Yes, We form long-term partnerships with specialized service providers to enhance their services and create value for clients.
You: exit
Chatbot: Goodbye!
