# Earthquake Dashboard
----
*Florian Cywinski,
preprocessEarthquakeData,
Data Science,
Berlin, Germany*
----
https://www.kaggle.com/datasets/usgs/earthquake-database
----
Significant Earthquakes, 1965-2016 <br>
Date, time, and location of all earthquakes with magnitude of 5.5 or higher
----

In [7]:
import pandas as pd

# 1. CSV laden
df = pd.read_csv('/home/......./........../......../earthquake-dashboard-nextjs/public/data/earthquake-database.csv')
print('Originale CSV-Datei')
display(df.head())

# 2. Filtern: Magnitude ≥ 5.5
df = df[df['Magnitude'] >= 5.5]

# 3. Relevante Spalten
df = df[['Date', 'Time', 'Latitude', 'Longitude', 'Magnitude']]

# 4. Datum bereinigen und formatieren
df['Date'] = pd.to_datetime(df['Date'], errors='coerce', utc=True)
df['Date'] = df['Date'].dt.tz_localize(None)
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')

# 5. NaN entfernen
df.dropna(subset=['Date', 'Time', 'Latitude', 'Longitude', 'Magnitude'], inplace=True)

# 6. Zusatzspalte 'Year'
df['Year'] = pd.to_datetime(df['Date']).dt.year

# 7. Ausgabepfade in Public Data
out_dir = '../public/data/'  # relativer Pfad vom scripts-Ordner aus

# 8. Gefilterte Daten als JSON
filtered_path = out_dir + 'filtered_earthquakes.json'
df.to_json(filtered_path, orient='records', indent=2, force_ascii=False)
print('Gefilterte CSV-Datei')
display(df.head())

# 9. Häufigkeit pro Jahr
earthquakes_per_year = (
    df.groupby('Year')
      .size()
      .reset_index(name='count')
)
epery_path = out_dir + 'earthquakes_per_year.json'
earthquakes_per_year.to_json(epery_path, orient='records', indent=2)
print('Häufigkeit der Erdbeben pro Jahr')
display(earthquakes_per_year.head())

# 10. Durchschnittsmagnitude pro Jahr
avg_magnitude_per_year = (
    df.groupby('Year')['Magnitude']
      .mean()
      .reset_index(name='averageMagnitude')
)
amagy_path = out_dir + 'avg_magnitude_per_year.json'
avg_magnitude_per_year.to_json(amagy_path, orient='records', indent=2)
print('Durchschnittliche Magnitude pro Jahr')
display(avg_magnitude_per_year.head())

print(f"→ JSON-Dateien geschrieben nach {out_dir}")

Originale CSV-Datei


Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


Gefilterte CSV-Datei


Unnamed: 0,Date,Time,Latitude,Longitude,Magnitude,Year
0,1965-01-02,13:44:18,19.246,145.616,6.0,1965
1,1965-01-04,11:29:49,1.863,127.352,5.8,1965
2,1965-01-05,18:05:58,-20.579,-173.972,6.2,1965
3,1965-01-08,18:49:43,-59.076,-23.557,5.8,1965
4,1965-01-09,13:32:50,11.938,126.427,5.8,1965


Häufigkeit der Erdbeben pro Jahr


Unnamed: 0,Year,count
0,1965,339
1,1966,234
2,1967,255
3,1968,305
4,1969,323


Durchschnittliche Magnitude pro Jahr


Unnamed: 0,Year,averageMagnitude
0,1965,6.014159
1,1966,6.04047
2,1967,6.003922
3,1968,6.078525
4,1969,6.00935


→ JSON-Dateien geschrieben nach ../public/data/
