In [8]:
%load_ext autoreload
%autoreload 2
# Packages that we are going to use in this project
import pandas as pd
import numpy as np
from datetime import datetime, timedelta


import sys 
import os

import matplotlib.pyplot as plt
import plotly.express as px

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf

from scipy.fftpack import fft
from scipy.signal import blackman
from scipy.signal import periodogram

from sklearn.cluster import KMeans
import seaborn as sns; sns.set()
from sklearn.preprocessing import MinMaxScaler

from functions import *


module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"\\df")
from manage_file import FILES, getPath

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## STEP 1: Read & view of the data

In [36]:
# READ DATA INPUT : earth, moon or minable. Prefer use minable, 
# that contains earth and moon data, with clustering and interpolation already done
df = read_data(file="minable")

## STEP 2:  __Frequency for a period of time of earthquakes considering magnitude segmentation.__

For ex:
* period_length = 10 (every 10 years compare)
* mag_seg = {magnitude < 4 |  4<= magnitude < 5 | 5 < magnitude }

In [None]:
######## you must want to change these piece of code ########
period_length = 10
#############################################################
df = period_calculation(df, period_length)# how many earthquakes for each period

In [None]:
######## you must want to change these piece of code ########
mag_seg = {0: [0,4.6], 1: [4.6,4.9], 2: [4.9,10]} 
#############################################################
df = magnitude_segmentation(df, mag_seg)

In [None]:
# Histogram plot to see the count of earthquakes of each mag_seg for all the periods
fig = px.histogram(df, x="MAG_SEG", color='PERIOD', barmode='group', height=400)
fig.show()

In [None]:
# Plot of number of earthquakes for each period of time.
fig = px.histogram(df, x="PERIOD",color='PERIOD', barmode='group',height=400)
fig.show()

In [43]:
# Histogram groupby monthly
histogram_monthly(df, date_off_set=False, bool_mag_seg=True)

### Increasing by country

In [47]:
# Count amount of earthquakes of each country
countries_value_counts(df)

No_Country        : 55384
Indonesia         : 31219
Japan             : 17750
Papua New Guinea  : 15428
Philippines       : 13281
Tonga             : 10404
Russia            : 10167
Chile             : 8969
Vanuatu           : 7774
Alaska            : 7455
Solomon Islands   : 5890
Japan region      : 5091
New Zealand       : 5059
Peru              : 4284
China             : 4042
Mexico            : 3943
Fiji              : 3053
India             : 2994
Iran              : 2854
Timor Leste       : 2772
Argentina         : 2712
Northern Mariana Is: 2403
Greece            : 2403
Afghanistan       : 2339
Guam              : 2198
New Caledonia     : 2041
Taiwan            : 1943
Colombia          : 1832
Guatemala         : 1294
Nicaragua         : 1252
Tajikistan        : 1126
Pakistan          : 1102
Turkey            : 1039
Ecuador           : 1009
El Salvador       : 997
Myanmar           : 883
Panama            : 822
Canada            : 728
Wallis and Futuna : 689
Aleutian Islands, A: 6

In [None]:
######## you must want to change these piece of code ########
countries = ["Indonesia", "Japan", "Chile", "No_Country"]
#############################################################
histogram_countries(df, countries)

### Trendline for magnitude segments

In [50]:
trendline_calculations(df)

Trendline for the whole series is               : 10978.3
The proportion of trendline and #eartquakes are : 0.04
Trendline for Mag Segment 2 is              : 1394.8
The proportion of trendline and #eartquakes are : 0.01
Trendline for Mag Segment 1 is              : 5664.3
The proportion of trendline and #eartquakes are : 0.05
Trendline for Mag Segment 0 is              : 3919.2
The proportion of trendline and #eartquakes are : 0.08


{'Mag Segment 2': 1394.8, 'Mag Segment 1': 5664.3, 'Mag Segment 0': 3919.2}

### Clustering

In [None]:
df = calculate_clustering(df, normalized=False)

In [None]:
df1 = df[df.Pais == "No_Country"]
plot_map(df1, clusters=True)

In [53]:
series_trend, negative_trend = trendline_calculations(df,cluster=True)

For Cluster 3 the trendline is                 : 884.6
Cluster 3: Proportion of trend and #eartquakes : 0.05
For Cluster 5 the trendline is                 : 761.7
Cluster 5: Proportion of trend and #eartquakes : 0.05
For Cluster 30 the trendline is                 : 753.3
Cluster 30: Proportion of trend and #eartquakes : 0.07
For Cluster 11 the trendline is                 : 661.3
Cluster 11: Proportion of trend and #eartquakes : 0.11
For Cluster 41 the trendline is                 : 650.4
Cluster 41: Proportion of trend and #eartquakes : 0.08
For Cluster 2 the trendline is                 : 575.9
Cluster 2: Proportion of trend and #eartquakes : 0.04
For Cluster 45 the trendline is                 : 539.6
Cluster 45: Proportion of trend and #eartquakes : 0.04
For Cluster 19 the trendline is                 : 516.2
Cluster 19: Proportion of trend and #eartquakes : 0.06
For Cluster 33 the trendline is                 : 496.3
Cluster 33: Proportion of trend and #eartquakes : 0.05
For Clu

In [None]:
# Plot map on negative trendline cluster
######## you must want to change these piece of code ########
clusters_toplot = list(negative_trend.keys()) # [48,8,46,18] for example
#############################################################
plot_map(df, clusters=True, specific_clusters=clusters_toplot)

In [58]:
specific_cluster_info(df, 16)

Cantidad de data del cluster 16 es 4113
1973 - 1982     731
1983 - 1992     807
1993 - 2002     689
2003 - 2012     708
2013 - 2022    1178
Name: PERIOD, dtype: int64
Cluster 16 has a trend of 79.5, proportion is 0.02


In [None]:
# histogram by specific clusters
clusters = [47,43,16]
histogram_cluster(df,clusters)

## Moon correlation

In [31]:
import numpy as np
import pandas as pd

def describe_columns(df, columns, step_quantile=0.25, clusters=[]):
    """
    Computes descriptive statistics of each column in a pandas DataFrame.
    
    Parameters:
        df (pandas.DataFrame): A pandas DataFrame object.
        columns (list): list of columns.
        step_quantile (float): The step between each quantile to compute. Default is 0.25.
        clusters (list): list of clusters of interest.
    
    Returns:
        (pandas.DataFrame): A DataFrame object containing the computed statistics for each column.
    """
    if clusters:
        df = df[df.cluster_label.isin(clusters)]
    
    df = df[columns]
    
    quantiles = np.arange(step_quantile, 1+step_quantile, step_quantile)
    stats = {
        'count': df.count(),
        'mean': df.mean(),
        'std': df.std(),
        'min': df.min(),
    }
    
    for q in quantiles:
        stats[f'{q*100:.0f}%'] = df.quantile(q)
        
    stats['max'] = df.max()
    stats_df = pd.DataFrame(stats)
    
    # Round values to 2 decimal places
    stats_df = stats_df.round(2)
    
    return stats_df.transpose()



In [33]:
describe_columns(df, ["r/km_interpolated", "ill_frac_interpolated"],
                 step_quantile=0.1, clusters= list(negative_trend.keys()) )


Unnamed: 0,r/km_interpolated,ill_frac_interpolated
count,15948.0,15948.0
mean,385452.18,48.6
std,15223.72,35.26
min,356731.3,0.03
10%,364361.47,2.26
20%,369195.84,8.91
30%,373737.52,19.71
40%,379994.86,31.4
50%,386340.84,46.49
60%,393113.86,62.8


In [30]:
df

Unnamed: 0,index,time,year,month,day,latitude,longitude,mag,depth,Pais,...,NewMonth,cluster_label,acum_day,ill_frac,r/km,dec,ra/h,ra/°,ill_frac_interpolated,r/km_interpolated
0,90,1973-01-01 03:46:09,1973,1,1,-9.2140,150.6340,5.3,41.000,Papua New Guinea,...,12,2,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,10.910562,405861.982414
1,91,1973-01-01 05:22:29,1973,1,1,-15.0120,-173.9580,5.0,33.000,Tonga,...,12,3,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,10.550897,405826.876407
2,92,1973-01-01 09:28:57,1973,1,1,-22.1610,-65.7920,4.8,205.000,Argentina,...,12,6,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,9.630703,405737.058477
3,93,1973-01-01 11:42:37,1973,1,1,-35.5130,-16.2110,6.0,33.000,No_Country,...,12,43,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,9.131653,405688.347373
4,94,1973-01-02 00:53:20,1973,1,2,-9.8540,117.4270,5.5,66.000,Indonesia,...,12,12,2,6.378599,405419.628500,-25.256921,16.700503,250.507552,6.236433,405371.503848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264384,264474,2022-12-31 18:04:14,2022,12,31,-23.6424,179.5146,4.5,577.986,No_Country,...,12,19,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.502620,388346.403168
264385,264475,2022-12-31 18:05:56,2022,12,31,-16.3267,-173.5390,4.7,62.895,No_Country,...,12,3,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.514230,388352.102638
264386,264476,2022-12-31 18:28:42,2022,12,31,-10.2389,66.5344,4.6,10.000,No_Country,...,12,51,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.669725,388428.430837
264387,264477,2022-12-31 18:54:49,2022,12,31,-52.5784,28.0139,4.5,10.000,No_Country,...,12,42,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.848100,388515.990345


In [35]:
df

Unnamed: 0,index,time,year,month,day,latitude,longitude,mag,depth,Pais,...,NewMonth,cluster_label,acum_day,ill_frac,r/km,dec,ra/h,ra/°,ill_frac_interpolated,r/km_interpolated
0,90,1973-01-01 03:46:09,1973,1,1,-9.2140,150.6340,5.3,41.000,Papua New Guinea,...,12,2,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,10.910562,405861.982414
1,91,1973-01-01 05:22:29,1973,1,1,-15.0120,-173.9580,5.0,33.000,Tonga,...,12,3,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,10.550897,405826.876407
2,92,1973-01-01 09:28:57,1973,1,1,-22.1610,-65.7920,4.8,205.000,Argentina,...,12,6,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,9.630703,405737.058477
3,93,1973-01-01 11:42:37,1973,1,1,-35.5130,-16.2110,6.0,33.000,No_Country,...,12,43,1,11.754904,405944.396500,-24.015288,15.836820,237.552301,9.131653,405688.347373
4,94,1973-01-02 00:53:20,1973,1,2,-9.8540,117.4270,5.5,66.000,Indonesia,...,12,12,2,6.378599,405419.628500,-25.256921,16.700503,250.507552,6.236433,405371.503848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264384,264474,2022-12-31 18:04:14,2022,12,31,-23.6424,179.5146,4.5,577.986,No_Country,...,12,19,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.502620,388346.403168
264385,264475,2022-12-31 18:05:56,2022,12,31,-16.3267,-173.5390,4.7,62.895,No_Country,...,12,3,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.514230,388352.102638
264386,264476,2022-12-31 18:28:42,2022,12,31,-10.2389,66.5344,4.6,10.000,No_Country,...,12,51,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.669725,388428.430837
264387,264477,2022-12-31 18:54:49,2022,12,31,-52.5784,28.0139,4.5,10.000,No_Country,...,12,42,365,60.097395,384711.370474,6.419824,1.326635,19.899520,67.848100,388515.990345
