In [1]:
import pandas as pd 
import requests
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

In [2]:
#Function to fetch table to data
def fetch_data_from_url(url):
    #gets the page content from the url 
    html = requests.get(url).content
    #read the table content of html 
    df_list = pd.read_html(html)
    #targets the first table from the list of table
    df = df_list[0]
    df.index += 1 
    return df

In [3]:
#a better way to do this 
urls = ["http://seismonepal.gov.np/earthquakes/1994",
       "http://seismonepal.gov.np/earthquakes/1995",
       "http://seismonepal.gov.np/earthquakes/1996",
       "http://seismonepal.gov.np/earthquakes/1997",
       "http://seismonepal.gov.np/earthquakes/1998",
       "http://seismonepal.gov.np/earthquakes/1999",
       "http://seismonepal.gov.np/earthquakes/2000",
       "http://seismonepal.gov.np/earthquakes/2001",
       "http://seismonepal.gov.np/earthquakes/2002",
       "http://seismonepal.gov.np/earthquakes/2003",
       "http://seismonepal.gov.np/earthquakes/2004",
       "http://seismonepal.gov.np/earthquakes/2005",
       "http://seismonepal.gov.np/earthquakes/2006",
       "http://seismonepal.gov.np/earthquakes/2007",
       "http://seismonepal.gov.np/earthquakes/2008",
       "http://seismonepal.gov.np/earthquakes/2009",
       "http://seismonepal.gov.np/earthquakes/2010",
       "http://seismonepal.gov.np/earthquakes/2011",
       "http://seismonepal.gov.np/earthquakes/2012",
       "http://seismonepal.gov.np/earthquakes/2013",
       "http://seismonepal.gov.np/earthquakes/2014",
       "http://seismonepal.gov.np/earthquakes/2015",
       "http://seismonepal.gov.np/earthquakes/2016",
       "http://seismonepal.gov.np/earthquakes/2017",
       "http://seismonepal.gov.np/earthquakes/2018",
       "http://seismonepal.gov.np/earthquakes/2019",]

#getting all the data from 1994-2019 in one place     
result = []
for url in urls:
    result.append(fetch_data_from_url(url))

In [257]:
#concatenate all the table from result array to one dataframe
df = pd.concat(result)

In [261]:
df = df.reset_index()

In [265]:
df.columns

Index(['index', 'Date', 'Time', 'Latitude', 'Longitude', 'Magnitude(ML)',
       'Remarks', 'Epicenter', 'Form'],
      dtype='object')

In [266]:
#dropping the un-necessary columns
df = df.drop(['Form','Remarks','index'],axis = 1)

In [268]:
# check data type of each columns
df.columns = ['Date','Time','Lat',"Long","Magnitude","Epicenter"]

In [269]:
df.head()

Unnamed: 0,Date,Time,Lat,Long,Magnitude,Epicenter
0,B.S:2051-8-28 A.D:1994-12-13,Local:11:00UTC:N/A,28.7,82.88,4.6,Rukum
1,B.S:2051-8-27 A.D:1994-12-12,Local:11:00UTC:N/A,29.84,80.69,4.6,Darchula
2,B.S:2051-8-12 A.D:1994-11-27,Local:11:00UTC:N/A,29.72,81.56,4.5,Bajura
3,B.S:2051-8-6 A.D:1994-11-21,Local:11:00UTC:N/A,29.54,81.15,4.2,Bajhang
4,B.S:2051-7-8 A.D:1994-10-24,Local:11:00UTC:N/A,28.92,82.0,4.7,Jajarkot


In [270]:
#change the Date column to a proper format(removing the nepali date)
dates = []
for line in df.Date:
#   Split the two dates given 
    date = line.split()
    date = line.split(":")
    del date[:2]
    dates.append(date)

In [271]:
# Change the Time column to standard format
Times = []
for line in df.Time:
    Time = line.split("Local:")  
    del Time[0]
    for time_line in Time:
        Time = time_line.split('UTC:')
        del Time[1]
#         print(Time)
    Times.append(Time)

In [272]:
#Covert the date and time column to pandas dataframe
Times = pd.DataFrame(Times)
Times.columns =['Time']
Dates = pd.DataFrame(dates)
Dates.columns=['Date']

In [273]:
# Concatenate the date and time dataframe together separated by space
Dates['DateTime'] = Dates["Date"]+ " " + Times["Time"].map(str)

In [274]:
len(Dates)

965

In [283]:
# checking to see if the two dates in the two dataframe match
test = pd.DataFrame({'x':Dates.DateTime, 'y':df.Date})

In [286]:
#Convert the DateTime column to datetime type
Dates['DateTime'] = pd.to_datetime(Dates['DateTime'],  errors='coerce')

In [288]:
# Merge the datetime column to the main dataframe
df['DateTime'] = Dates['DateTime']

In [290]:
df.head(20)

Unnamed: 0,Date,Time,Lat,Long,Magnitude,Epicenter,DateTime
0,B.S:2051-8-28 A.D:1994-12-13,Local:11:00UTC:N/A,28.7,82.88,4.6,Rukum,1994-12-13 11:00:00
1,B.S:2051-8-27 A.D:1994-12-12,Local:11:00UTC:N/A,29.84,80.69,4.6,Darchula,1994-12-12 11:00:00
2,B.S:2051-8-12 A.D:1994-11-27,Local:11:00UTC:N/A,29.72,81.56,4.5,Bajura,1994-11-27 11:00:00
3,B.S:2051-8-6 A.D:1994-11-21,Local:11:00UTC:N/A,29.54,81.15,4.2,Bajhang,1994-11-21 11:00:00
4,B.S:2051-7-8 A.D:1994-10-24,Local:11:00UTC:N/A,28.92,82.0,4.7,Jajarkot,1994-10-24 11:00:00
5,B.S:2051-7-6 A.D:1994-10-22,Local:11:00UTC:N/A,29.0,82.26,4.6,Jajarkot,1994-10-22 11:00:00
6,B.S:2051-6-10 A.D:1994-09-25,Local:11:00UTC:N/A,28.34,87.35,4.8,Sankhuwasabha,1994-09-25 11:00:00
7,B.S:2051-5-16 A.D:1994-08-31,Local:11:00UTC:N/A,26.09,79.51,6.0,India,1994-08-31 11:00:00
8,B.S:2051-4-3 A.D:1994-07-17,Local:11:00UTC:N/A,29.37,81.52,5.5,Bajura,1994-07-17 11:00:00
9,B.S:2051-3-11 A.D:1994-06-25,Local:05:45UTC:00:00,27.75,86.16,5.1,Dolakha,1994-06-25 05:45:00


In [291]:
#Drop the columns not required
df = df.drop(['Date','Time'],axis = 1)

In [294]:
df.head()

Unnamed: 0,Lat,Long,Magnitude,Epicenter,DateTime
9,27.75,86.16,5.1,Dolakha,1994-06-25 05:45:00
8,29.37,81.52,5.5,Bajura,1994-07-17 11:00:00
7,26.09,79.51,6.0,India,1994-08-31 11:00:00
6,28.34,87.35,4.8,Sankhuwasabha,1994-09-25 11:00:00
5,29.0,82.26,4.6,Jajarkot,1994-10-22 11:00:00


In [293]:
df = df.sort_values(by='DateTime')

In [297]:
#Drop any row with epicenters outside Nepal
check = ['India',"Bangladesh","Bhuj/India","Bhutan","Chamauli/India","Tibet"]

for place in check:
    for epicenter in df.Epicenter:
        df = df[df.Epicenter != place]

In [301]:
df.to_csv('earthquake.csv')