In [1]:
import pandas as pd
import numpy as np
import pickle
import os
import ast
import requests
import re
import json
import datetime
from google.cloud import bigquery
from google.oauth2 import service_account

In [2]:
# method to trigger the prediction model
def predict_volcano_eruption(model, earthquake_data):
    predictions = model.predict(earthquake_data)
    return predictions

# authenticate with bigQuery using credential Json
credentials_path = 'big-data-final-project-407314-840759d2dedd.json'
dataset_id = 'earthquakes'
table_id = 'quakes'
project_id = 'big-data-final-project-407314'

# Initialize BigQuery client
credentials = service_account.Credentials.from_service_account_file(credentials_path)
client = bigquery.Client(credentials=credentials, project=project_id)
table_ref = f"{project_id}.{dataset_id}.{table_id}"

# the query to select the seismic activity of the previous day.
query = """
    SELECT *
    FROM `big-data-final-project-407314.earthquakes.quakes`
    WHERE TIMESTAMP(t) >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 DAY)
      AND TIMESTAMP(t) < CURRENT_TIMESTAMP()
"""

# Execute the query and convert to a DataFrame
earthquake_data = client.query(query).to_dataframe()

# transform theresults from streaming data to conform with the data used with the data used durin model building
earthquake_data.rename(columns={'lat': 'latitude'}, inplace=True)
earthquake_data.rename(columns={'lon': 'longitude'}, inplace=True)
earthquake_data.rename(columns={'dep': 'depth'}, inplace=True)
earthquake_data.rename(columns={'s': 'mag'}, inplace=True)
earthquake_data.drop('t', axis=1, inplace=True)
earthquake_data.drop('a', axis=1, inplace=True)
earthquake_data.drop('dD', axis=1, inplace=True)
earthquake_data.drop('dL', axis=1, inplace=True)
earthquake_data.drop('dR', axis=1, inplace=True)
earthquake_data.drop('q', axis=1, inplace=True)

# Reorder the columns
new_order = [ 'latitude', 'longitude', 'depth', 'mag']
earthquake_data = earthquake_data[new_order]

# Load the pre-trained model
model_file = r'best_gradient_boosting_model.pkl'
with open(model_file, 'rb') as file:
    model = pickle.load(file)

# Predict volcano eruptions
predictions = predict_volcano_eruption(model, earthquake_data)
print(predictions)


[1 0 0 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1]


In [3]:
# interpreting predictions to Eruption Likely and Eruption Unlikely
def interpret_predictions(predictions):
    interpretation = []
    for pred in predictions:
        interpretation.append("Eruption Likely" if pred == 1 else "Eruption Unlikely")
    return interpretation

# method to calculate the likelihood of an eruption to happend
def calculate_eruption_likelihood(interpreted_predictions):    
    eruption_likely_count = interpreted_predictions.count("Eruption Likely")
    total_predictions = len(interpreted_predictions)
    if total_predictions == 0:
        return 0  
    return (eruption_likely_count / total_predictions) * 100


# Convert numpy array to list if necessary
model_predictions = predictions.tolist() if isinstance(predictions, np.ndarray) else predictions

# Interpret the predictions
interpreted_predictions = interpret_predictions(model_predictions)

# Calculate the likelihood percentage
eruption_likelihood_percentage = calculate_eruption_likelihood(interpreted_predictions)

# Forming a human-readable message
message = f"The likelihood of a volcano eruption in Iceland is {eruption_likelihood_percentage:.2f}%."
print(message)


The likelihood of a volcano eruption in Iceland is 70.00%.


##### From my project proposal I used the data from only the Icelandic Meteorological Office,  the US Geological Survey (USGS) Earthquake Hazards Programand the  Smithsonian Institution's Global Volcanism Program. And instead of generic Google Storage I used BigQuery.