In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch
from tqdm import tqdm

In [None]:
data = pd.read_csv('FIR_DATASET(updated).csv')

In [None]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)


In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(data['section'].unique()))


In [None]:
train_data['Description'] = train_data['Description'].astype(str)


In [None]:
train_encodings = tokenizer(train_data['Description'].tolist(), padding=True, truncation=True, return_tensors='pt')
train_labels = torch.tensor(train_data['section'].astype('category').cat.codes.tolist())

In [None]:
test_encodings = tokenizer(test_data['Description'].tolist(), padding=True, truncation=True, return_tensors='pt')
test_labels = torch.tensor(test_data['section'].astype('category').cat.codes.tolist())


In [None]:
train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)


In [None]:
test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'], test_labels)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)


In [None]:
optimizer = AdamW(model.parameters(), lr=2e-5)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


In [None]:
for epoch in range(3):
    model.train()
    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}'):
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

In [None]:
model.eval()
all_preds = []
all_labels = []

In [None]:
with torch.no_grad():
    for batch in tqdm(test_loader, desc='Testing'):
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1).cpu().numpy()

        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())






In [None]:
accuracy = accuracy_score(all_labels, all_preds)
print(f'Test Accuracy: {accuracy:.4f}')


In [None]:
def suggest_section(complaint_description):
    tokenized_description = tokenizer(complaint_description, padding=True, truncation=True, return_tensors='pt')
    input_ids = tokenized_description['input_ids'].to(device)
    attention_mask = tokenized_description['attention_mask'].to(device)

    model.eval()
    with torch.no_grad():
        output = model(input_ids, attention_mask=attention_mask)
        logits = output.logits
        predicted_class = torch.argmax(logits, dim=1).item()

    # Convert the predicted class back to the original section label
    predicted_section = data['section'].unique()[predicted_class]
    return predicted_section


In [None]:
# Example usage
complaint_description = "The suspect stole my wallet and assaulted me."
suggested_section = suggest_section(complaint_description)
print("Suggested Section:", suggested_section)


In [None]:
def generate_fir(complainant_name, father_name, address, phone_number, email,
                 place_of_occurrence, date_of_occurrence, property_description,
                 accused_description, witness_details, complaint):

    # Fetch additional information from the dataset based on the suggested section
    section_info = data[data['section'] == suggested_section].iloc[0]
    bailable = section_info['Bailable']
    cognizable = section_info['Cognizable']
    court = section_info['Court']
    punishment = section_info['Punishment']
    
    # Use BERT to fill the nature of the offense
    offense_nature = suggest_section(complaint)

    # Create the FIR template
    fir_template = f"""Police Station: [Police Station]
District: [District]

1. Personal details of the Complainant / Informant:
(a) Name: {complainant_name}
(b) Father's / Husband's Name: {father_name}
(c) Address: {address}
(d) Phone number & Fax: {phone_number}
(e) Email: {email}

2. Place of Occurrence: {place_of_occurrence}

3. Date and Hour of Occurrence: {date_of_occurrence}

4. Offence:
(a) Nature of the offence: {offense_nature}  
(b) Section: {suggested_section}  
(c) Particulars of the property: {property_description}

5. Description of the accused: {accused_description} 

6. Additional Section Information:
   - Bailable: {bailable}
   - Cognizable: {cognizable}
   - Court: {court}
   - Punishment: {punishment}

7. Details of witnesses (if any): {witness_details}

8. Complaint: {complaint}
"""

    # Return the generated FIR
    return fir_template

In [None]:
fir_text = generate_fir("John Doe", "Mr. Doe", "123 Main St", "555-1234", "john@example.com",
                        "Public Park", "2024-01-15 15:30:00", "Stolen wallet and assault",
                        "Tall person, wearing a black jacket", "Witness A, Witness B",
                        "Reporting the theft and assault incident.")
print(fir_text)