# [View in nbviewer to see the interactive chart](https://nbviewer.org/github/HighTechnologyFoundation/HTFCloudFellows/blob/Justin-Lin/USGS%20-%20Earthquake/Earthquake_analysis.ipynb)

# Analysis and Visualization using Earthquake Data from [United States Geological Survey (USGS)](https://earthquake.usgs.gov/earthquakes/search/)

## Task 1: Exploratory Data Analysis of Earthquakes with Magnitude > 6.5 (1900-2023)

1. Utilize `sweetviz` to generate an interactive dashboard for visualizing earthquake data.
2. Extract country names from the text data using the `pycountry` package.
3. Create two subplots to display earthquake counts for the top ten countries and each year.

## Task 2: Exploratory Data Analysis of Earthquakes in Taiwan (03/2024-04/2024)

1. Perform date-time processing on the data.
2. Utilize various plotting libraries to create maps:
	* `matplotlib` for static images.
	* `plotly` for interactive visualizations.
	* `geopandas` for geospatial analysis and visualization.
	* `pygmt` for high-performance geographic mapping.
---

## Task 1

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)

df = pd.read_csv('mag6.5+.csv')
df['year'] = pd.to_datetime(df.time).dt.year
df.head()

In [None]:
import sweetviz as sv

analyze_report = sv.analyze(df)
analyze_report.show_html('report.html')

<div align=center>
<img src="sweetviz_output.png" width="900"/>
</div>

In [3]:
import pycountry

def findCountry(stringText):
    countries = sorted([country.name for country in pycountry.countries] , key=lambda x: -len(x))
    for country in countries:
        if country.lower() in stringText.lower():
            return country
    if 'Taiwan' in stringText:
        return 'Taiwan'
    return None

In [4]:
country_list = [findCountry(df.place[i]) for i in range(len(df.place))]
data = {'year' : df.year, 'country' : country_list, 'magnitude' : df.mag}
modified_df = pd.DataFrame(data)

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')

fig, axs = plt.subplots(1, 2, figsize=(18, 8))

modified_df.country.value_counts().sort_values().tail(15).plot(kind='barh', color = 'tab:blue', ax = axs[0])
modified_df.year.value_counts().sort_index().plot(kind='line', color = 'tab:blue', ax = axs[1])
axs[0].set_title('Top ten countries with most earthquakes (magnitude 6.5+)\nfrom year 1900 to present')
axs[0].set_ylabel('')
axs[1].set_title('Earthquake (magnitude 6.5+) counts from year 1900 to present')
axs[1].tick_params(axis='x', labelrotation=45)
axs[1].set_xlabel('')
axs[1].axhline(y=df.shape[0] / len(df.year.unique()), color='red', linestyle='--', alpha=0.5)

plt.show()

## Task 2

In [None]:
df = pd.read_csv('query.csv')

df['date'] = pd.to_datetime(df.time).dt.strftime('%Y/%m/%d')

df['time'] = pd.to_datetime(df.time).dt.strftime('%H:%M:%S')

df = df[['date', 'time', 'latitude', 'longitude', 'depth', 'mag', 'place']].rename(columns = {'mag' : 'magnitude'})

df.head()

In [None]:
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
import plotly.io as pio
pio.renderers.default = 'notebook'

fig = px.scatter_geo(df, lat='latitude',
                     lon='longitude',
                     color="magnitude",
                     hover_name = 'place',
                     fitbounds='locations'
                     )

fig.update_geos(projection_type="natural earth")

fig.update_layout(title_text = f'Earthquake (magnitude > 2.5) from {min(df.date)} to {max(df.date)}', title_x=0.5)

fig.show()

In [None]:
import geopandas as gpd

df_Taiwan = df[df.place.str.contains('Taiwan')]

gdf = gpd.GeoDataFrame(
    df_Taiwan, geometry=gpd.points_from_xy(df_Taiwan.longitude, df_Taiwan.latitude), crs="EPSG:4326"
)

gdf.explore('magnitude')

In [None]:
import pygmt

region = [
    df_Taiwan.longitude.min() - 1,
    df_Taiwan.longitude.max() + 1,
    df_Taiwan.latitude.min() - 1,
    df_Taiwan.latitude.max() + 1,
]

fig = pygmt.Figure()

fig.coast(region=region, shorelines=True, land="lightgreen", water="lightblue")

fig.basemap(frame=["afg", f'+tEarthquake in Taiwan from {min(df_Taiwan.date)} to {max(df_Taiwan.date)}'])

pygmt.makecpt(cmap="plasma", series=[df_Taiwan.depth.min(), df_Taiwan.depth.max()])

fig.plot(
    x=df_Taiwan.longitude,
    y=df_Taiwan.latitude,
    fill=df_Taiwan.depth,
    cmap = True,
    style="c0.3c",
    pen="black"
)
fig.colorbar(frame=["a", "y+lDepth (km)"])
fig.show()

In [None]:
fig = pygmt.Figure()

# make color pallets
pygmt.makecpt(
    cmap='etopo1',
    series='-8000/5000/1000', #min elevation of -8000m and max of 5000m
    continuous=True
)
# define etopo df_Taiwan file
topo_df_Taiwan = "@earth_relief_30s"
# plot high res topography
fig.grdimage(
    grid=topo_df_Taiwan,
    region=region,
    projection='M4i',
    shading=True,
    frame=True
)

fig.coast(shorelines=True, frame=True)

# colorbar colormap
pygmt.makecpt(cmap="jet", series=[
              df_Taiwan.depth.min(), df_Taiwan.depth.max()])
fig.plot(
    x=df_Taiwan.longitude,
    y=df_Taiwan.latitude,
    size=0.1*df_Taiwan.magnitude,
    fill=df_Taiwan.depth,
    cmap=True,
    style="cc",
    pen="black",
)

fig.basemap(frame=f'+tEarthquake in Taiwan from {min(df_Taiwan.date)} to {max(df_Taiwan.date)}')

fig.colorbar(frame='af+l"Depth (km)"')

fig.show()