# Data Scientist Capstone: Turkey Earthquakes

## Import libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns
import geopy
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from pyhigh import get_elevation
import requests
pd.set_option('display.max_columns', None)

## Remote Request and Plotly Chart Configuration

In [2]:
# Function to make a remote HTTP GET request
def make_remote_request(url, params):
    """
    This function sends an HTTP GET request to a remote URL with query parameters.

    Args:
        url (str): The URL to send the request to.
        params (dict): A dictionary of query parameters to include in the request.

    Returns:
        requests.Response: The response object received from the remote server.
    """
    response = requests.get(url, params=params)
    return response

# Function to configure a Plotly chart
def configure_chart(fig, title, x_title, y_title, change_layout=True, sort_axes=True):
    """
    This function configures the layout and appearance of a Plotly chart.

    Args:
        fig (plotly.graph_objs.Figure): The Plotly figure representing the chart.
        title (str): The title of the chart.
        x_title (str): The label for the X-axis.
        y_title (str): The label for the Y-axis.
        change_layout (bool, optional): Whether to change layout properties (default=True).
        sort_axes (bool, optional): Whether to sort X-axis categories in descending order (default=True).

    Returns:
        plotly.graph_objs.Figure: The modified Plotly figure object.
    """
    if change_layout:
        fig.update_layout({
            'plot_bgcolor': 'rgba(0,0,0,0)',
            'paper_bgcolor': 'rgba(0,0,0,0)',
        })
    fig.update_layout(
        title=dict(
            text=f'<b>{title}</b>',
            x=0.5,
            y=0.95,
            font=dict(
                family="Arial",
                size=18,
                color='#581738'
            )
        ),
    )
    fig.update_yaxes(title_text=y_title)
    fig.update_xaxes(title_text=x_title)
    if sort_axes:
        fig.update_xaxes(categoryorder="total descending")
    return fig

# General Overview on Dataset

About Dataset
Turkey Earthquake Dataset (1991-2023)
Description:

This dataset contains information about earthquakes that occurred in Turkey from 1991 to 2023. The data includes the date and time of each earthquake, its magnitude, depth, and the latitude and longitude of the epicenter.
Content:

Each row in the dataset represents a single earthquake event. The columns in the dataset are as follows:

    Time: The date and time of the earthquake.
    Magnitude: The magnitude of the earthquake on the Richter scale.
    Depth: The depth of the earthquake's epicenter in kilometers (will be converted to meters).
    Latitude: The latitude of the earthquake's epicenter.
    Longitude: The longitude of the earthquake's epicenter.

Acknowledgements:

The data was collected from the USGS Earthquake Catalog API.
Usage Policy:

This dataset is provided for educational and research purposes only. It should not be used for commercial purposes.


source:https://www.kaggle.com/datasets/atasaygin/turkey-earthquakes-1915-2023

# About My Project

In this project, I will analyze seismic data from 1991 to 2023 to answer three questions:

    Can a pattern be found for the largest seismic magnitude over the years?

    Is the pattern of magnitudes constant or variable, and how does it appear?

    Do the depths of the earthquake epicenters remain constant at a certain level or do they vary?

## Download Dataset and Create DataFrame

In [3]:
# I will daonload dataset from my repository in GitHub:
github_url = 'https://raw.githubusercontent.com/AlKhurayyif/Data-Scientist-Capstone/main/earthquakes.csv'

# Now I will create my dataframe:
df = pd.read_csv(github_url)

In [4]:
df

Unnamed: 0,Time,Magnitude,Place,Latitude,Longitude,Depth
0,2023-09-04 05:26:08.839,4.8,"11 km SSW of Özdere, Turkey",37.9198,27.0744,10.000
1,2023-08-29 04:58:15.305,4.8,,38.9659,32.9248,7.489
2,2023-08-27 03:09:43.247,4.4,"9 km NNW of Émponas, Greece",36.3079,27.8301,69.004
3,2023-08-24 05:35:24.510,4.8,,38.2125,38.1815,10.000
4,2023-08-23 08:19:30.701,4.3,eastern Turkey,39.2741,40.3563,10.000
...,...,...,...,...,...,...
19243,1991-01-02 15:48:50.280,2.7,"6 km NE of Yenice, Turkey",39.7950,28.9700,10.000
19244,1991-01-02 14:02:13.580,2.8,"15 km NNW of Manyas, Turkey",40.1770,27.9120,10.000
19245,1991-01-02 09:35:48.660,2.7,"13 km S of Mudanya, Turkey",40.2550,28.8760,10.000
19246,1991-01-01 20:13:44.900,2.8,"21 km WNW of Yenice, Turkey",39.8160,28.6690,10.000


## Create a Special Function for Calculating Elevation, Depth Below Sea Level and Weights.

In [5]:
def elevation_function(df, col_latitude, col_longitude):
    '''
    Calculate elevation values for geographic coordinates and add them to a DataFrame.

    Parameters:
        df (pandas.DataFrame): The DataFrame containing latitude and longitude columns.
        col_latitude (float): The name of the column containing latitude values.
        col_longitude (float): The name of the column containing longitude values.

    Returns:
        pandas.DataFrame: The original DataFrame with an additional 'Elevations' column.
    '''
    
    # Define the URL for the elevation data service
    url = 'https://api.opentopodata.org/v1/eudem25m'

    # Create a new DataFrame to store the results
    df_with_elevations = df.copy()

    # Create an empty list to store elevation values
    elevations = []

    # Iterate over the rows in the original DataFrame
    for index, row in df.iterrows():
        # Extract latitude and longitude values from the DataFrame
        lat = row[col_latitude]
        lon = row[col_longitude]

        # Form a request to the Open Topo Data service with location parameters
        params = {'locations': f"{lat},{lon}"}

        # Send a GET request to the service and retrieve the response
        result = make_remote_request(url, params)  # Assumes the 'make_remote_request' function is defined elsewhere

        # Extract the elevation value from the JSON response
        elevation = result.json()['results'][0]['elevation']

        # Append the elevation value to the 'elevations' list
        elevations.append(elevation)

    # Handle cases where elevation data might be unavailable (None) by replacing with 0
    elevations = [0 if i is None else i for i in elevations]

    # Add a new column 'Elevations' to the DataFrame 'df_with_elevations' with calculated elevation values
    df_with_elevations['Elevation'] = elevations

    # Return the modified DataFrame
    return df_with_elevations


In [6]:
def depth_below_sea(df, col_elevation, col_depth):
    '''
    Calculate depth of the earthquake's epicenter values below sea level and add them to a DataFrame.

    Parameters:
        df (pandas.DataFrame): The DataFrame containing elevation and depth columns.
        col_elevation (float): The name of the column containing elevation values.
        col_depth (float): The name of the column containing depth values.

    Returns:
        pandas.DataFrame: The original DataFrame with an additional 'BelowSeaLevel' column.
    '''
    
    # Create a new DataFrame to store the results
    df_depth_below_sea = df.copy()
        
    # Add a new column 'BelowSeaLevel' to the DataFrame 'df_depth_below_sea' with calculated depth (below sea level) values
    df_depth_below_sea['BelowSeaLevel'] = df_depth_below_sea[col_elevation] - df_depth_below_sea[col_depth]
        
    # Return the modified DataFrame
    return df_depth_below_sea

In [7]:
def weighit(x):
    '''
    Calculate weights.

    Parameters:
        x (float): The name of argument containing numeral values.

    Returns:
        weighit: The weighit of argument.
    '''
    
    # Return the weighit
    return x.sum() / len(x) 

## Data preparation

In [8]:
# I will drop a column that I do not need and any missing values:
df = df.dropna().drop(columns='Place')

In [9]:
# Now, I will rename column Time to Timestamp: 
df.columns = ['Timestamp', 'Magnitude', 'Latitude', 'Longitude', 'Depth']

In [10]:
# Here, I created many columns that depend on Time: 
df.Timestamp = pd.to_datetime(df.Timestamp)
df['Date'] = df.Timestamp.dt.date
df['Year'] = df.Timestamp.dt.year
df['Month'] = df.Timestamp.dt.month
df['Day'] = df.Timestamp.dt.day
df['Hour'] = df.Timestamp.dt.hour
df['Munite'] = df.Timestamp.dt.minute
df['Second'] = df.Timestamp.dt.second

In [11]:
# I will convert the values in the 'Depth' column to meters:
df.Depth = df.Depth * 1000

In [12]:
# Here, I reset index of df:
df = df.reset_index(drop=True)

In [13]:
df

Unnamed: 0,Timestamp,Magnitude,Latitude,Longitude,Depth,Date,Year,Month,Day,Hour,Munite,Second
0,2023-09-04 05:26:08.839,4.8,37.9198,27.0744,10000.0,2023-09-04,2023,9,4,5,26,8
1,2023-08-27 03:09:43.247,4.4,36.3079,27.8301,69004.0,2023-08-27,2023,8,27,3,9,43
2,2023-08-23 08:19:30.701,4.3,39.2741,40.3563,10000.0,2023-08-23,2023,8,23,8,19,30
3,2023-08-22 10:17:34.400,4.7,38.4244,36.6825,10457.0,2023-08-22,2023,8,22,10,17,34
4,2023-08-21 15:45:57.264,4.3,36.7359,28.1337,73187.0,2023-08-21,2023,8,21,15,45,57
...,...,...,...,...,...,...,...,...,...,...,...,...
17449,1991-01-03 04:06:23.260,3.0,39.5200,28.8360,10000.0,1991-01-03,1991,1,3,4,6,23
17450,1991-01-02 15:48:50.280,2.7,39.7950,28.9700,10000.0,1991-01-02,1991,1,2,15,48,50
17451,1991-01-02 14:02:13.580,2.8,40.1770,27.9120,10000.0,1991-01-02,1991,1,2,14,2,13
17452,1991-01-02 09:35:48.660,2.7,40.2550,28.8760,10000.0,1991-01-02,1991,1,2,9,35,48


In [14]:
# Here, I'm creating a new dataframe for earthquake counts for all years (1991 - 2023):
earthquake_count = df.groupby(['Year']).agg({'Timestamp': 'count'}).reset_index()
earthquake_count.columns = ['Year', 'Count']
earthquake_count

Unnamed: 0,Year,Count
0,1991,934
1,1992,555
2,1993,2298
3,1994,2603
4,1995,860
5,1996,536
6,1997,285
7,1998,1100
8,1999,1307
9,2000,291


In [15]:
# Here, I'm creating a new dataframe for the maximum magnitude in all years (1991 - 2023):
Magnitude_Max = df.groupby(['Year']).agg({'Magnitude': 'max'}).reset_index()
Magnitude_Max.columns = ['Year', 'MaxMagnitude']
Magnitude_Max

Unnamed: 0,Year,MaxMagnitude
0,1991,5.5
1,1992,6.7
2,1993,5.3
3,1994,5.5
4,1995,6.4
5,1996,6.2
6,1997,5.7
7,1998,6.3
8,1999,7.6
9,2000,6.0


In [52]:
# I am creating a new dataframe that contains the count of earthquakes, 
# the maximum magnitude, and the date of the maximum magnitude:
df_Magnitude = earthquake_count.merge(Magnitude_Max, on='Year')
df_Magnitude = pd.merge(df_Magnitude, df[['Year', 'Date', 'Latitude', 'Longitude', 'Magnitude']], left_on=['Year', 'MaxMagnitude'] \
         , right_on=['Year', 'Magnitude']).drop('Magnitude', axis=1)

df_Magnitude

Unnamed: 0,Year,Count,MaxMagnitude,Date,Latitude,Longitude
0,1991,934,5.5,1991-07-24,36.52,44.066
1,1992,555,6.7,1992-03-13,39.71,39.605
2,1993,2298,5.3,1993-08-26,36.736,28.051
3,1994,2603,5.5,1994-05-24,38.664,26.542
4,1995,860,6.4,1995-10-01,38.063,30.134
5,1996,536,6.2,1996-07-20,36.147,27.103
6,1997,285,5.7,1997-01-22,36.25,35.951
7,1998,1100,6.3,1998-06-27,36.878,35.307
8,1999,1307,7.6,1999-08-17,40.748,29.864
9,2000,291,6.0,2000-12-15,38.457,31.351


In [93]:
# I am creating a new DataFrame that contains the weight of earthquakes by magnitude:
df_weighit = df.groupby(['Year']).agg({'Magnitude': weighit}).reset_index()
df_weighit.columns = ['Year', 'Weighit']
df_weighit

Unnamed: 0,Year,Weighit
0,1991,3.094861
1,1992,3.264144
2,1993,2.874195
3,1994,2.929428
4,1995,3.142674
5,1996,3.266978
6,1997,3.291579
7,1998,2.995909
8,1999,3.218516
9,2000,3.565979


# Q1: Can a pattern be found for the largest seismic magnitude over the years?

In [107]:
fig = px.line(df_Magnitude, x='Year', y='MaxMagnitude',
             labels={'Year': 'Year', 'MaxMagnitude': 'Maximum Magnitude (MaxMagnitude)'},
             title='The Maximum Magnitude in All Years')

fig.update_layout( 
    title=dict(
        text='The Maximum Magnitude in All Years',
        x=0.5,
        y=0.95,
    ),
)

fig.show()

From the analysis, it does not appear that there is a pattern that can be traced from the seismic measurements for the largest magnitude each year.

# Q2: Is the pattern of magnitudes constant or variable, and how does it appear?

In [108]:
fig = px.line(df_weighit, x='Year', y='Weighit',
             labels={'Year': 'Year', 'Weighit': 'Weighit of Magnitude (Richter)'},
             title='The Weighit of Magnitude in All Years')

fig.update_layout( 
    title=dict(
        text='The Weighit of Magnitude in All Years',
        x=0.5,
        y=0.95,
    ),
)

fig.show()

From the analysis, we find that the weight of the magnitude suddenly increased in 2009 and became somewhat constant at a higher level.

In [109]:
df

Unnamed: 0,Timestamp,Magnitude,Latitude,Longitude,Depth,Date,Year,Month,Day,Hour,Munite,Second,Elevation,BelowSeaLevel
0,2023-09-04 05:26:08.839,4.8,37.9198,27.0744,10000.0,2023-09-04,2023,9,4,5,26,8,0.000000,-10000.000000
1,2023-08-27 03:09:43.247,4.4,36.3079,27.8301,69004.0,2023-08-27,2023,8,27,3,9,43,0.000000,-69004.000000
2,2023-08-23 08:19:30.701,4.3,39.2741,40.3563,10000.0,2023-08-23,2023,8,23,8,19,30,1331.781372,-8668.218628
3,2023-08-22 10:17:34.400,4.7,38.4244,36.6825,10457.0,2023-08-22,2023,8,22,10,17,34,2026.617798,-8430.382202
4,2023-08-21 15:45:57.264,4.3,36.7359,28.1337,73187.0,2023-08-21,2023,8,21,15,45,57,198.004562,-72988.995438
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17449,1991-01-03 04:06:23.260,3.0,39.5200,28.8360,10000.0,1991-01-03,1991,1,3,4,6,23,907.886047,-9092.113953
17450,1991-01-02 15:48:50.280,2.7,39.7950,28.9700,10000.0,1991-01-02,1991,1,2,15,48,50,805.865417,-9194.134583
17451,1991-01-02 14:02:13.580,2.8,40.1770,27.9120,10000.0,1991-01-02,1991,1,2,14,2,13,13.874000,-9986.126000
17452,1991-01-02 09:35:48.660,2.7,40.2550,28.8760,10000.0,1991-01-02,1991,1,2,9,35,48,93.381340,-9906.618660


In [21]:
# Here, I'm using elevation_function function for calculate of coordinates:
df = elevation_function(df, 'Latitude', 'Longitude')
df

Unnamed: 0,Timestamp,Magnitude,Latitude,Longitude,Depth,Date,Year,Month,Day,Hour,Munite,Second,Elevation
0,2023-09-04 05:26:08.839,4.8,37.9198,27.0744,10000.0,2023-09-04,2023,9,4,5,26,8,0.000000
1,2023-08-27 03:09:43.247,4.4,36.3079,27.8301,69004.0,2023-08-27,2023,8,27,3,9,43,0.000000
2,2023-08-23 08:19:30.701,4.3,39.2741,40.3563,10000.0,2023-08-23,2023,8,23,8,19,30,1331.781372
3,2023-08-22 10:17:34.400,4.7,38.4244,36.6825,10457.0,2023-08-22,2023,8,22,10,17,34,2026.617798
4,2023-08-21 15:45:57.264,4.3,36.7359,28.1337,73187.0,2023-08-21,2023,8,21,15,45,57,198.004562
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17449,1991-01-03 04:06:23.260,3.0,39.5200,28.8360,10000.0,1991-01-03,1991,1,3,4,6,23,907.886047
17450,1991-01-02 15:48:50.280,2.7,39.7950,28.9700,10000.0,1991-01-02,1991,1,2,15,48,50,805.865417
17451,1991-01-02 14:02:13.580,2.8,40.1770,27.9120,10000.0,1991-01-02,1991,1,2,14,2,13,13.874000
17452,1991-01-02 09:35:48.660,2.7,40.2550,28.8760,10000.0,1991-01-02,1991,1,2,9,35,48,93.381340


In [22]:
# Now, I'm using depth_below_sea function for calculate of depth below sea level :
df = depth_below_sea(df, 'Elevation', 'Depth')
df

Unnamed: 0,Timestamp,Magnitude,Latitude,Longitude,Depth,Date,Year,Month,Day,Hour,Munite,Second,Elevation,BelowSeaLevel
0,2023-09-04 05:26:08.839,4.8,37.9198,27.0744,10000.0,2023-09-04,2023,9,4,5,26,8,0.000000,-10000.000000
1,2023-08-27 03:09:43.247,4.4,36.3079,27.8301,69004.0,2023-08-27,2023,8,27,3,9,43,0.000000,-69004.000000
2,2023-08-23 08:19:30.701,4.3,39.2741,40.3563,10000.0,2023-08-23,2023,8,23,8,19,30,1331.781372,-8668.218628
3,2023-08-22 10:17:34.400,4.7,38.4244,36.6825,10457.0,2023-08-22,2023,8,22,10,17,34,2026.617798,-8430.382202
4,2023-08-21 15:45:57.264,4.3,36.7359,28.1337,73187.0,2023-08-21,2023,8,21,15,45,57,198.004562,-72988.995438
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17449,1991-01-03 04:06:23.260,3.0,39.5200,28.8360,10000.0,1991-01-03,1991,1,3,4,6,23,907.886047,-9092.113953
17450,1991-01-02 15:48:50.280,2.7,39.7950,28.9700,10000.0,1991-01-02,1991,1,2,15,48,50,805.865417,-9194.134583
17451,1991-01-02 14:02:13.580,2.8,40.1770,27.9120,10000.0,1991-01-02,1991,1,2,14,2,13,13.874000,-9986.126000
17452,1991-01-02 09:35:48.660,2.7,40.2550,28.8760,10000.0,1991-01-02,1991,1,2,9,35,48,93.381340,-9906.618660


In [96]:
# I am creating a new DataFrame that contains the weight of earthquakes by magnitude:
df_depth = df.groupby(['Year']).agg({'BelowSeaLevel': min}).reset_index()
df_depth.columns = ['Year', 'MinBelowSeaLevel']
df_depth

Unnamed: 0,Year,MinBelowSeaLevel
0,1991,-159600.0
1,1992,-163900.0
2,1993,-159238.08429
3,1994,-163000.0
4,1995,-168400.0
5,1996,-161300.0
6,1997,-161000.0
7,1998,-159200.0
8,1999,-159500.0
9,2000,-128800.0


# Q3: Do the depths of the earthquake epicenters remain constant at a certain level or do they vary?

In [118]:
fig = px.line(df_depth, x='Year', y='MinBelowSeaLevel',
             labels={'Year': 'Year', 'MinBelowSeaLevel': 'Below Sea Level (meters)'},
             title='The Minimum Depth Below Sea Level in All Years')

fig.update_layout( 
    title=dict(
        text='The Minimum Depth Below Sea Level in All Years',
        x=0.5,
        y=0.95,
    ),
)
fig.add_vline(x=2014)
fig.add_vline(x=2019)
fig.add_vline(x=2023)

fig.show()

From the analysis, we find that the earthquake epicenter somewhat increases suddenly and specifically in the years 2009, 2014, 2019, and 2023. This may indicate the presence of a periodic pattern.

In [115]:
fig = px.scatter_mapbox(df, 
                        lat="Latitude", 
                        lon="Longitude", 
                        hover_name='Magnitude',
                        color=df['Year'].astype(str),  
                        color_discrete_map=color_discrete_map,
                        zoom=8, 
                        height=800,
                        width=800)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

fig.update_layout( 
    title=dict(
        text='<b> Earthquae Map </b>',
        x=0.5,
        y=0.95,
        font=dict(
            size=18,
        )
    ),
)

fig.update_traces(marker=dict(opacity=0.8))

In [99]:
df_Magnitude

Unnamed: 0,Year,Count,MaxMagnitude,Date,Latitude,Longitude
0,1991,934,5.5,1991-07-24,36.52,44.066
1,1992,555,6.7,1992-03-13,39.71,39.605
2,1993,2298,5.3,1993-08-26,36.736,28.051
3,1994,2603,5.5,1994-05-24,38.664,26.542
4,1995,860,6.4,1995-10-01,38.063,30.134
5,1996,536,6.2,1996-07-20,36.147,27.103
6,1997,285,5.7,1997-01-22,36.25,35.951
7,1998,1100,6.3,1998-06-27,36.878,35.307
8,1999,1307,7.6,1999-08-17,40.748,29.864
9,2000,291,6.0,2000-12-15,38.457,31.351


In [114]:
fig = px.scatter_mapbox(df_Magnitude, 
                        lat="Latitude", 
                        lon="Longitude", 
                        hover_name='Year',
                        color=df_Magnitude['MaxMagnitude'].astype(str),  
                        color_discrete_map=color_discrete_map,
                        zoom=8, 
                        height=800,
                        width=800)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

fig.update_layout( 
    title=dict(
        text='<b>The Max Magnitude of Earthquake Map</b>',
        x=0.5,
        y=0.95,
        font=dict(
            size=18,
        )
    ),
)

fig.update_traces(marker=dict(opacity=0.7))  # Set opacity to 0.7 for visibility

# Concloutions

In this project, I analyzed seismic data from 1991 to 2023 to answer three questions:

- Can a pattern be found for the largest seismic magnitude over the years?

- Is the pattern of magnitudes constant or variable, and how does it appear?

- Do the depths of the earthquake epicenters remain constant at a certain level or do they vary?

The results of the analysis can be summarized as follows:
- From the analysis, it does not appear that there is a pattern that can be traced from the seismic measurements for the largest magnitude each year.

- From the analysis, we find that the weight of the magnitude suddenly increased in 2009 and became somewhat constant at a higher level.

- From the analysis, we find that the earthquake epicenter somewhat increases suddenly and specifically in the years 2009, 2014, 2019, and 2023. This may indicate the presence of a periodic pattern.

# Save a DataFrame In My folder

In [27]:
# I will save a new dataset in my folder becoase the elevation_function take more time:
dataset_path = 'C:\\Users\\Ibrahim Al-Khurayyif\\Data-Scientist-Capstone\dataset_new.csv'

dataset_new = df.to_csv(dataset_path, index=False)