# Task 1 & 2: Downloading & reading the data

In [None]:
# Importing all the required libraries
import pandas    as pd
import numpy     as np
import missingno as msno
import seaborn   as sns
import matplotlib.pyplot as plt
import scipy.stats as ss

# Reading the data into pandas dataframe
polandData    = pd.read_csv('Poland.dat',delimiter = '|')               # 1st Data Set -> Poland

# Task 3: Writing a class

In [None]:
class earthquake:
    def __init__(self,dfData):
        self.dfData = dfData
        self.arr = self.dfData.to_numpy()
    
    # Task 4: Function for missing data
    def missingData(self):
        print(self.dfData.isna().sum())
        print(f'The total number of missing values is {self.dfData.isnull().values.sum()}')
        msno.matrix(self.dfData, figsize = (15,5))
    
    # Task 5: Function to build a DF for ANN
    def build_ANN_df(self,colList):
        self.df = pd.DataFrame()
        for col in colList:
            self.df[col] = self.dfData[col]
        return self.df
    
    # Task 5: Function to get a categorical column
    def genToNum(self,col):
        temp = self.dfData[col]
        name = col + 'Num'
        self.df[name] = pd.factorize(temp)[0]
        return self.df
    
    # Task 5: Function to categorize data for different intervals
    def colInterval(self,colList):
        for col in colList:
            maxNum = max(self.df[col])
            name = col + 'Classified'
            self.df[name] = self.df[col] / maxNum
        return self.df
    
    # Task 6: Function to plot the 3D Scatter Plot
    def scatterPlot(self):
        x = self.arr[:, 2]
        y = self.arr[:, 3]
        z = self.arr[:, 4]
        mag = self.arr[:,10]
        magTyp = self.arr[:,9]

        fig = plt.figure(figsize=(15,15))
        ax = fig.add_subplot(projection='3d')
        
        markers = ['*', '1', '+', 's', 'x']
        colors = ['crimson','violet','blue','cyan','darkred']
        
        sctt = ax.scatter(x,y,z,c=mag,s=40, marker = '.',label="Depth")
        
        magType = self.dfData.groupby('MagType')
        for name, group in magType:
            for g in group.values:
                if g[9] == 'MLv': ax.scatter(g[2],g[3],g[10],s=50,marker=markers[0],color=colors[0],label="MLv")
                if g[9] == 'M'  : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[1],color=colors[1],label="M")
                if g[9] == 'mb' : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[2],color=colors[2],label="mb")
                if g[9] == 'ML' : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[3],color=colors[3],label="ML")
                if g[9] == 'Mw' : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[4],color=colors[4],label="Mw")
                

        ax.set_title('3D scatter plot for the Visualization of Earthquake')
        ax.set_xlabel(r'Latitude ($\degree$ degree)', fontsize=15, rotation=150)
        ax.set_ylabel(r'Longitude ($\degree$ degree)', fontsize=15)
        cbar = fig.colorbar(sctt, aspect = 3)
        cbar.set_label("Depth (Km)", loc='top')

        handles, labels = plt.gca().get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        plt.legend(by_label.values(), by_label.keys())

        plt.show()
        
    def measureRep(self,col):
        repetition_counts = self.dfData[col].value_counts()
        maxRepValue = repetition_counts.idxmax()
        maxRepCount = repetition_counts.max()
        print(f"The value with the maximum repetition in column '{col}' is '{maxRepValue}' with a count of {maxRepCount}.")
        
        x = self.arr[:, 2]
        y = self.arr[:, 3]
        z = self.arr[:, 4]
        mag = self.arr[:,10]
        magTyp = self.arr[:,9]
        
        fig = plt.figure(figsize=(15,15))
        ax = fig.add_subplot(projection='3d')
        
        markers = ['*', '1', '+', 's', 'x']
        colors = ['crimson','violet','blue','cyan','darkred']
        
        sctt = ax.scatter(x,y,z,c=mag,s=40, marker = '.',label="Depth")
        
        magType = self.dfData.groupby('MagType')
        for name, group in magType:
            for g in group.values:
                if g[9] == maxRepValue: ax.scatter(g[2], g[3], g[10], s=50, marker='o', color='black', label="Highlited as it appeared max number of times")
                if g[9] == 'MLv': ax.scatter(g[2],g[3],g[10],s=50,marker=markers[0],color=colors[0],label="MLv")
                if g[9] == 'M'  : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[1],color=colors[1],label="M")
                if g[9] == 'mb' : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[2],color=colors[2],label="mb")
                if g[9] == 'ML' : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[3],color=colors[3],label="ML")
                if g[9] == 'Mw' : ax.scatter(g[2],g[3],g[10],s=50,marker=markers[4],color=colors[4],label="Mw")
                    
        ax.set_title('3D scatter plot for the Visualization of Earthquake')
        ax.set_xlabel(r'Latitude ($\degree$ degree)', fontsize=15, rotation=150)
        ax.set_ylabel(r'Longitude ($\degree$ degree)', fontsize=15)
        cbar = fig.colorbar(sctt, aspect = 3)
        cbar.set_label("Depth (Km)", loc='top')

        handles, labels = plt.gca().get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        plt.legend(by_label.values(), by_label.keys())

        plt.show()        
                
    def cumulativeNum(self):
        x = self.arr[:,10]
        x.sort()
        self.cumulativeDict = {}
        
        for i in range(len(x)):
            val = x[i]
            self.cumulativeDict[val] = len(x)-i
        
        return self.cumulativeDict
       
    def plotCumulative(self):
        self.m = []
        self.n = []
        for j in self.cumulativeDict:
            self.m.append(j)
            self.n.append(self.cumulativeDict[j])
        
        plt.figure(figsize=(10, 8))
        plt.scatter(self.m,np.log(self.n))
        plt.xlabel('Magnitude')
        plt.ylabel('Log(N)')
        plt.show()
        
    def linReg(self):
        self.xReg = np.array(self.m)
        self.yReg = np.log(self.n)
        result = ss.linregress(self.xReg,self.yReg)
        self.result = np.array(result)
        
        return result
        
    def plotLinReg(self):
        fig = plt.figure(figsize=(10,10))
        ax = fig.add_subplot()
        
        y_gutenberg = self.result[0]*self.xReg + self.result[1]

        ax.scatter(self.xReg,self.yReg)
        ax.plot(self.xReg,y_gutenberg,color='black', linestyle='--', 
                label = 'Gutenberg-Richter relationship : y = {} * Magnitude + {}'.format(round(self.result[0], 2), round(self.result[1], 2)))
        ax.set_title('Plot of Magnitude vs log(N) & linear regression ',fontsize=15)
        ax.set_xlabel(r'Magnitude', fontsize=15)
        ax.set_ylabel(r'Log(N)', fontsize=15)
        ax.grid()
        plt.legend(loc='upper right', fontsize=14)
        plt.show()
        
class subearthquake(earthquake):
    # Function which takes list of column numbers and builds a new DF with the same columns
    def build_ANN_df(self,colList):
        flag = True
        
        for val in colList:
            flag = isinstance(val, int)
            
        if flag == True:
            newdf = pd.DataFrame()
            list(self.dfData.columns)
            for col in colList:
                newdf[self.dfData.columns[col]] = self.dfData.iloc[:,col]
            return newdf
        else:
            print("***Error: Please provide a list with column numbers")

In [None]:
# Creating an instance of the class
poland = earthquake(polandData)

# Task 3 (Part B): Child Class and Inheritance

In [None]:
# Creating an instance of the child class
polandChild = subearthquake(polandData)

# Creating a list of columns numbers to be added in new df
colNum = [2,3,4,9,10]

# Calling the function which takes list of col numbers and build a new ANN
ann = polandChild.build_ANN_df(colNum)
ann.head(3)

# Task 4: Visualization of the missing values

In [None]:
# Using method 'missingData' to print & visualize the missing data at the same time 
poland.missingData()

# Task 5: Building a DF for ANN

In [None]:
# Making a list of columns needed in the DF for ANN
col = ['Latitude','Longitude','Depth/km','Magnitude']

# Using method 'build_ANN_df' to build a new DF for the ANN
poland_ann = poland.build_ANN_df(col)
poland_ann.head(3)

In [None]:
# Using method 'genToNum' to categarize a given column and add the categorized column to the ANN DF
poland.genToNum('MagType')
poland_ann.head(3)

In [None]:
col = ['Latitude','Longitude','Depth/km','Magnitude']
polandCompare = poland.colInterval(col)
polandCompare.head(10)

# Task 6: Generating 3D scatterplot

In [None]:
poland.scatterPlot()

# Task 7: Max Repitation and highlighting it on the plot

In [None]:
poland.measureRep('MagType')

# Task 8: Calculate N as the cumulative number of earthquakes

In [None]:
x = poland.cumulativeNum()
print(x)

# Task 9: Plotting a fig containing data points of (M,log(N))

In [None]:
poland.plotCumulative()

# Task 10: Performing regression analysis available in SciPy

In [None]:
poland.linReg()

# Task 11: Plotting the fitted line on the plot of task 9

In [None]:
poland.plotLinReg()