In [1]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://archive.apache.org/dist/spark/spark-3.2.4/spark-3.2.4-bin-hadoop3.2.tgz
!tar xf spark-3.2.4-bin-hadoop3.2.tgz
!pip install -q findspark

In [2]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.2.4-bin-hadoop3.2"

In [3]:
import findspark
findspark.init()
findspark.find()

'/content/spark-3.2.4-bin-hadoop3.2'

In [4]:
# İmporting libraries
from pyspark import SparkContext
import math
from datetime import datetime, timedelta

In [18]:
#Load data
data_rdd = spark_context.textFile("/content/Earthquakes.txt")  # Replace with your path

#Skip header
header = data_rdd.first()
lines = data_rdd.filter(lambda line: line != header)

#Parsing line
def parse_line(line):
    fields = line.split("\t")
    event_id = fields[1]
    date = fields[2]
    year = int(date.split(".")[0])
    lat = float(fields[4])
    lon = float(fields[5])
    mag = float(fields[8])  # Using 'ML' as magnitude
    return (year, date, lat, lon, mag, event_id)

parsed_data = lines.map(parse_line)

In [19]:
#Filter earthquakes between
filtered_data = parsed_data.filter(lambda x: 1990 <= x[0] <= 2019)

#Get top 10 earthquakes
top_10_earthquakes = filtered_data.top(10, key=lambda x: x[4])

# Haversine formula to compute distance between two points
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in kilometers
    dLat = math.radians(lat2 - lat1)
    dLon = math.radians(lon2 - lon1)

    a = (math.sin(dLat/2) ** 2 +
         math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
         math.sin(dLon/2) ** 2)

    distance = 2 * R * math.atan2(math.sqrt(a), math.sqrt(1-a))
    return distance



In [20]:
# Display potential foreshocks and aftershocks for top ten earthquakes
for quake in top_10_earthquakes:
    foreshocks, aftershocks = find_shocks(quake)

    print(f"\nFor earthquake with Event ID: {quake[5]} on {quake[1]} with magnitude {quake[4]}:")

    print("\nPotential Foreshocks:")
    for s in foreshocks:
        print(f"Event ID: {s[5]}, Date: {s[1]}, Magnitude: {s[4]}, Location: ({s[2]}, {s[3]})")

    print("\nPotential Aftershocks:")
    for s in aftershocks:
        print(f"Event ID: {s[5]}, Date: {s[1]}, Magnitude: {s[4]}, Location: ({s[2]}, {s[3]})")



For earthquake with Event ID: 19990817000137 on 1999.08.17 with magnitude 6.7:

Potential Foreshocks:
Event ID: 19990817151752, Date: 1999.08.17, Magnitude: 4.1, Location: (40.75, 29.75)

Potential Aftershocks:
Event ID: 19990818211738, Date: 1999.08.18, Magnitude: 4.0, Location: (40.86, 30.04)
Event ID: 19990817151752, Date: 1999.08.17, Magnitude: 4.1, Location: (40.75, 29.75)

For earthquake with Event ID: 19980627135551 on 1998.06.27 with magnitude 6.3:

Potential Foreshocks:
Event ID: 19980627205016, Date: 1998.06.27, Magnitude: 4.1, Location: (36.84, 35.56)
Event ID: 19980627200307, Date: 1998.06.27, Magnitude: 3.7, Location: (36.97, 35.65)
Event ID: 19980627185439, Date: 1998.06.27, Magnitude: 4.0, Location: (37.01, 35.68)
Event ID: 19980627151218, Date: 1998.06.27, Magnitude: 3.5, Location: (36.97, 35.7)
Event ID: 19980627140737, Date: 1998.06.27, Magnitude: 4.2, Location: (36.81, 35.61)

Potential Aftershocks:
Event ID: 19980628152034, Date: 1998.06.28, Magnitude: 4.1, Locatio