In [22]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler

In [73]:
# Define the path to your file
file_path = '/Users/halilibrahimkanpak/Documents/Coding/MATAM/matam_data/boun_data.txt'

if os.path.exists(file_path):
    print("File exists. Proceeding to load the data.")
    
    # Define the column names (adjust according to your file structure)
    column_names = [
        "No", "Deprem Kodu", "Olus tarihi", "Olus zamani", "Enlem", "Boylam", "Der(km)", 
        "xM", "MD", "ML", "Mw", "Ms", "Mb", "Tip", "Yer"
    ]

    # Load the data into a DataFrame with the specified encoding
    df = pd.read_csv(file_path, delimiter="\t", names=column_names, skiprows=1, encoding='latin1')

    # Combine 'Olus tarihi' and 'Olus zamani' into a single datetime column
    df['Datetime'] = pd.to_datetime(df['Olus tarihi'] + ' ' + df['Olus zamani'], format='%Y.%m.%d %H:%M:%S.%f')

    # Convert the datetime column to a Unix timestamp
    df['Unix Timestamp'] = df['Datetime'].astype('int64') // 10**9

    numerical_cols = df.select_dtypes(include=['number']).columns
    
    # Create a StandardScaler object
    scaler = StandardScaler()

    standardized_df = standardize_numerical_data(df)

    standardized_df = standardized_df[['Enlem', 'Boylam', 'Der(km)','Unix Timestamp']]#[['Enlem', 'Boylam', 'Der(km)', 'xM', 'MD', 'ML', 'Mw', 'Ms', 'Mb','Unix Timestamp']]

standardized_df.head()

File exists. Proceeding to load the data.


Unnamed: 0,Enlem,Boylam,Der(km),Unix Timestamp
0,1.414971,-0.94304,-0.397189,1.164397
1,-1.495612,0.465453,-0.455676,1.164245
2,0.272376,1.169502,-0.470298,1.16422
3,0.006911,0.75062,-0.455676,1.164197
4,-1.698518,-0.82884,0.090203,1.163653


In [63]:
def standardize_numerical_data(df):
    # Identifying numerical columns
    numerical_cols = df.select_dtypes(include=['number']).columns
    
    # Creating a scaler object
    scaler = StandardScaler()
    
    # Standardizing the numerical columns
    df[numerical_cols] = scaler.fit_transform(df[numerical_cols])
    
    return df

In [76]:
def calculate_distance(row1, row2):
    """
    Calculate the Euclidean distance between two rows.

    Parameters:
    row1 (pd.Series): The first row of features.
    row2 (pd.Series): The second row of features.

    Returns:
    float: Euclidean distance between the two rows.
    """
    distance = np.sqrt(np.sum((row1 - row2) ** 2))
    return distance

In [83]:
def find_close_earthquakes(df, index, threshold):
    """
    Find earthquakes with distances less than the threshold from a given earthquake.

    Parameters:
    df (pd.DataFrame): DataFrame containing earthquake data.
    index (int): Index of the earthquake to compare against.
    threshold (float): Distance threshold.

    Returns:
    pd.DataFrame: DataFrame containing earthquakes within the specified distance.
    """
    target_earthquake = df.loc[index]
    close_earthquakes = []

    for idx, row in df.iterrows():
        if idx != index:  # Skip the target earthquake itself
            distance = calculate_distance(target_earthquake, row)
            if distance < threshold:
                close_earthquakes.append(row)

    return pd.DataFrame(close_earthquakes)

In [86]:
eq1 = standardized_df.loc[0]
eq2 = standardized_df.loc[1]

distance = calculate_distance(eq1, eq2)
print(f"The generalized distance between the two earthquakes is {distance}")

The generalized distance between the two earthquakes is 3.234001538528517


In [90]:
index = 0
threshold = 1.164

close_earthquakes = find_close_earthquakes(standardized_df, index, threshold)
close_earthquakes.head()

Unnamed: 0,Enlem,Boylam,Der(km),Unix Timestamp
8,0.997812,-0.643395,-0.504415,1.163166
9,1.833822,0.122304,-0.289962,1.16311
10,1.00314,-0.642658,-0.406937,1.163054
11,1.645521,-0.564697,-0.343576,1.162968
13,1.003454,-0.644903,-0.455676,1.162831
