In [None]:
# Mount Google Drive to access saved files
from google.colab import drive
drive.mount('/content/drive')  # Mounts Google Drive to '/content/drive'

Mounted at /content/drive


In [None]:
# Load the pre-trained fine-tuned model from pickle file
import pickle
with open('/content/drive/MyDrive/ML_Project/mlmodel.pkl', 'rb') as f:
    model = pickle.load(f)

In [None]:
# Import necessary libraries
from transformers import DistilBertTokenizer
import torch

In [None]:
# Load tokenizer and define the device
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
device = torch.device('cuda')  # Use GPU for computation

# Test a single sentence for disaster prediction and location extraction
test_sentence = "There is a cyclone in Florida"

# Tokenize the test sentence
test_input = tokenizer(
    test_sentence, 
    return_tensors='pt', 
    truncation=True, 
    padding=True
)
test_input = {k: v.to(device) for k, v in test_input.items()}  # Move input to GPU

# Perform model inference
test_output = model(**test_input)
test_prediction = torch.argmax(test_output.logits, dim=-1)  # Get predicted class

# Print the disaster prediction result
print(f'Test sentence: "{test_sentence}" is {"a disaster" if test_prediction.item() else "not a disaster"}')

# Use spaCy for named entity recognition (NER) to extract locations
import spacy
nlp = spacy.load('en_core_web_sm')  # Load spaCy English model
doc = nlp(test_sentence)  # Process the sentence
locations = [ent.text for ent in doc.ents if ent.label_ == 'GPE']  # Extract locations (Geo-political entities)
print("Disaster Locations:", locations)

In [None]:
# Process a collection of sentences from a CSV file
import pandas as pd
path = "/content/drive/MyDrive/ML_Project/fb_scraped.csv"  # Path to the CSV file
scraped_df = pd.read_csv(path)  # Load the scraped data

In [None]:
# Loop through each text in the 'Text' column of the dataframe
for texts in scraped_df['Text']:
    test_sentence = texts  # Current sentence

    # Tokenize and process the current sentence
    test_input = tokenizer(
        test_sentence, 
        return_tensors='pt', 
        truncation=True, 
        padding=True
    )
    test_input = {k: v.to(device) for k, v in test_input.items()}  # Move to GPU
    test_output = model(**test_input)  # Perform inference
    test_prediction = torch.argmax(test_output.logits, dim=-1)  # Get predicted class

    # Print the disaster prediction result
    print(f'Test sentence: "{test_sentence}" is {"a disaster" if test_prediction.item() else "not a disaster"}')

    # Perform NER on the current sentence to extract locations
    doc = nlp(test_sentence)
    locations = [ent.text for ent in doc.ents if ent.label_ == 'GPE']  # Extract locations
    print("Disaster Locations:", locations)