In [1]:
import pandas as pd 
import requests
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

In [2]:
#Function to fetch table to data
def fetch_data_from_url(url):
    #gets the page content from the url 
    html = requests.get(url).content
    #read the table content of html 
    df_list = pd.read_html(html)
    #targets the first table from the list of table
    df = df_list[0]
    df.index += 1 
    return df

In [3]:
#a better way to do this 
urls = ["http://seismonepal.gov.np/earthquakes/1994",
       "http://seismonepal.gov.np/earthquakes/1995",
       "http://seismonepal.gov.np/earthquakes/1996",
       "http://seismonepal.gov.np/earthquakes/1997",
       "http://seismonepal.gov.np/earthquakes/1998",
       "http://seismonepal.gov.np/earthquakes/1999",
       "http://seismonepal.gov.np/earthquakes/2000",
       "http://seismonepal.gov.np/earthquakes/2001",
       "http://seismonepal.gov.np/earthquakes/2002",
       "http://seismonepal.gov.np/earthquakes/2003",
       "http://seismonepal.gov.np/earthquakes/2004",
       "http://seismonepal.gov.np/earthquakes/2005",
       "http://seismonepal.gov.np/earthquakes/2006",
       "http://seismonepal.gov.np/earthquakes/2007",
       "http://seismonepal.gov.np/earthquakes/2008",
       "http://seismonepal.gov.np/earthquakes/2009",
       "http://seismonepal.gov.np/earthquakes/2010",
       "http://seismonepal.gov.np/earthquakes/2011",
       "http://seismonepal.gov.np/earthquakes/2012",
       "http://seismonepal.gov.np/earthquakes/2013",
       "http://seismonepal.gov.np/earthquakes/2014",
       "http://seismonepal.gov.np/earthquakes/2015",
       "http://seismonepal.gov.np/earthquakes/2016",
       "http://seismonepal.gov.np/earthquakes/2017",
       "http://seismonepal.gov.np/earthquakes/2018",
       "http://seismonepal.gov.np/earthquakes/2019",]

#getting all the data from 1994-2019 in one place     
result = []
for url in urls:
    result.append(fetch_data_from_url(url))

In [4]:
#concatenate all the table from result array to one dataframe
df = pd.concat(result)

In [5]:
#dropping the un-necessary columns
df = df.drop(['Form','Remarks'],axis = 1)

In [6]:
# check data type of each columns
df.columns = ['Date','Time','Lat',"Long","Magnitude","Epicenter"]

In [7]:
df.head()

Unnamed: 0,Date,Time,Lat,Long,Magnitude,Epicenter
1,B.S:2051-8-28 A.D:1994-12-13,Local:11:00UTC:N/A,28.7,82.88,4.6,Rukum
2,B.S:2051-8-27 A.D:1994-12-12,Local:11:00UTC:N/A,29.84,80.69,4.6,Darchula
3,B.S:2051-8-12 A.D:1994-11-27,Local:11:00UTC:N/A,29.72,81.56,4.5,Bajura
4,B.S:2051-8-6 A.D:1994-11-21,Local:11:00UTC:N/A,29.54,81.15,4.2,Bajhang
5,B.S:2051-7-8 A.D:1994-10-24,Local:11:00UTC:N/A,28.92,82.0,4.7,Jajarkot


In [17]:
#change the Date column to a proper format(removing the nepali date)
dates = []
for line in df.Date:
#   Split the two dates given 
    date = line.split()
    date = line.split(":")
    del date[:2]
    dates.append(date)

[['1994-12-13'],
 ['1994-12-12'],
 ['1994-11-27'],
 ['1994-11-21'],
 ['1994-10-24'],
 ['1994-10-22'],
 ['1994-09-25'],
 ['1994-08-31'],
 ['1994-07-17'],
 ['1994-06-25'],
 ['1995-10-04'],
 ['1995-08-07'],
 ['1995-08-07'],
 ['1995-08-03'],
 ['1995-06-21'],
 ['1995-06-11'],
 ['1995-04-17'],
 ['1995-02-18'],
 ['1995-01-30'],
 ['1995-01-30'],
 ['1995-01-29'],
 ['1995-01-19'],
 ['1996-12-29'],
 ['1996-12-22'],
 ['1996-12-03'],
 ['1996-11-08'],
 ['1996-11-04'],
 ['1996-10-16'],
 ['1996-07-10'],
 ['1996-04-26'],
 ['1996-02-28'],
 ['1997-12-08'],
 ['1997-11-27'],
 ['1997-11-26'],
 ['1997-10-24'],
 ['1997-10-11'],
 ['1997-05-28'],
 ['1997-05-21'],
 ['1997-04-07'],
 ['1997-03-24'],
 ['1997-02-18'],
 ['1997-02-10'],
 ['1997-02-03'],
 ['1997-02-03'],
 ['1997-02-01'],
 ['1997-01-31'],
 ['1997-01-31'],
 ['1997-01-05'],
 ['1997-01-01'],
 ['1998-11-26'],
 ['1998-10-16'],
 ['1998-09-10'],
 ['1998-09-06'],
 ['1998-09-03'],
 ['1998-07-15'],
 ['1998-06-27'],
 ['1998-05-16'],
 ['1998-05-10'],
 ['1998-02-28'

In [10]:
# Change the Time column to standard format
Times = []
for line in df.Time:
    Time = line.split("Local:")  
    del Time[0]
    for time_line in Time:
        Time = time_line.split('UTC:')
        del Time[1]
#         print(Time)
    Times.append(Time)

In [12]:
#Covert the date and time column to pandas dataframe
Times = pd.DataFrame(Times)
Times.columns =['Time']
Dates = pd.DataFrame(dates)
Dates.columns=['Date']

In [14]:
# Concatenate the date and time dataframe together separated by space
Dates['DateTime'] = Dates["Date"]+ " " + Times["Time"].map(str)

In [15]:
#Convert the DateTime column to datetime type
Dates['DateTime'] = pd.to_datetime(Dates['DateTime'],  errors='coerce')

In [18]:
#reset the index to start from 0 
df_1= df.reset_index()
df = df_1

In [19]:
# Merge the datetime column to the main dataframe
df['DateTime'] = Dates['DateTime']

In [20]:
#Drop the columns not required
df = df.drop(['Date','Time','index'],axis = 1)

In [21]:
df.to_csv('earthquake.csv')