In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings('ignore')

In [2]:
def import_csv(filePath):
    """
    Function for reading a CSV file

    Input: filePath <string> : The path to the CSV file.

    Returns : data <pd.DataFrame> : The data as a pandas' dataframe.

    """
    return pd.read_csv(filePath)

In [3]:
data = import_csv('earthquakes.csv')

## Exploratory Data Analysis 

In [15]:
# Number of rows and columns

def EDA(data):
    """
    This function aims to help explore the dataset
    
    Parameters: data <pandas.DataFrame> : dataset we're studying.

    Returns: None

    Prints: 
        1 - number of columns and rows
        2 - data type in each column
        3 - number of missing data per column
        4 - correlation between variables
    """
    row, col = data.shape
    print("The dataset has : ", row, " rows and ", col ," columns.")
    
    
    print("\n\n--------------------------------\n\n")
    print("Data Information : ")
    print(data.info())
    
    
    print("\n\n--------------------------------\n\n")
    print("Number of null per column:")
    print(data.isna().sum())
    
    
    print("\n\n--------------------------------\n\n")
    print("Correlation matrix:")

    # Compute the correlation matrix 
    data['Location_Name_Encoded'] = LabelEncoder().fit_transform(data['Location Name'])

    corr_matrix = data.drop(columns=['Location Name']).corr(method='pearson')

    # Plot the correlation matrix as a heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(corr_matrix, annot=False, fmt='.2f', cmap='coolwarm', cbar=True, square=True)
    plt.title("Correlation Matrix (Pearson Method)")
    plt.tight_layout()
    plt.show()

In [None]:
# Call Exploratory Data Analysis function on our dataset
EDA(data)