In [118]:
import pandas as pd
import numpy as np
from datetime import date

In [57]:
df = pd.read_csv('EMDAT_Indochina.csv', sep=',', encoding='ISO-8859-1',header=1)

In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 19 columns):
Start date                 388 non-null object
End date                   386 non-null object
Country                    386 non-null object
ISO                        386 non-null object
Location                   363 non-null object
Latitude                   55 non-null float64
Longitude                  57 non-null float64
Magnitude value            147 non-null float64
Magnitude scale            386 non-null object
Disaster type              386 non-null object
Disaster subtype           386 non-null object
Associated disaster        386 non-null object
Associated disaster2       386 non-null object
Total deaths               386 non-null float64
Total affected             386 non-null float64
Total damage ('000 US$)    386 non-null float64
insured_losses             386 non-null float64
Disaster name              107 non-null object
Disaster No.               386 non-null obje

In [59]:
print(df.columns.tolist())

['Start date', 'End date', 'Country', 'ISO', 'Location', 'Latitude', 'Longitude', 'Magnitude value', 'Magnitude scale', 'Disaster type', 'Disaster subtype', 'Associated disaster', 'Associated disaster2', 'Total deaths', 'Total affected', "Total damage ('000 US$)", 'insured_losses', 'Disaster name', 'Disaster No.']


In [60]:
vnDf = df[df['Country'] == 'Viet Nam']

In [70]:
droughtVnDf = vnDf[vnDf['Disaster type'].str.lower() == 'drought']

In [72]:
droughtDf = df[df['Disaster type'].str.lower() == 'drought']

In [74]:
print(droughtDf.shape)

(28, 19)


In [75]:
print(droughtVnDf.shape)

(6, 19)


In [77]:
vnDf = vnDf[['Start date', 'End date', 'Country', 'Location', 'Disaster type']]

In [79]:
floodVnDf = vnDf[vnDf['Disaster type'] == 'Flood']

In [104]:
def str2Date(str, start = True):
    if start:
        day = 1
        month = 1
    else:
        day = 31
        month = 12
    strParser = str.split('/')
    if (strParser[0] != ''):
        day = int(strParser[0])
    if (strParser[1] != ''):
        month = int(strParser[1])
    year = int(strParser[2])
    
    if start == False and day == 31:
        if month == 2:
            day = 28
        elif month in [4, 6, 9, 11]:
            day = 30
    return {'day': day, 'month': month, 'year': year}

### Calculate duration of a disaster by day

In [119]:
def calDurationOfDisaster(transaction):
    t1 = str2Date(transaction['Start date'], True)
    t2 = str2Date(transaction['End date'], False)
    d1 = date(t1['year'], t1['month'], t1['day'])
    d2 = date(t2['year'], t2['month'], t2['day'])
    delta = d2 - d1
    return delta.days

In [124]:
print([calDurationOfDisaster(floodVnDf.iloc[i]) for i in range(20)])

[30, 0, 0, 30, 29, 30, 0, 0, 18, 30, 3, 6, 4, 3, 10, 105, 28, 43, 5, 7]


In [112]:
print([calDurationOfDisaster(droughtVnDf.iloc[i]) for i in range(6)])

[364, 211, 364, 244, 244, 455]


In [125]:
def getMonthOfDisaster(transaction):
    t1 = str2Date(transaction['Start date'], True)
    t2 = str2Date(transaction['End date'], False)
    return {'startMonth': t1['month'], 'endMonth': t2['month'], 'inYear': t1['year'] == t2['year']}

In [116]:
getMonthOfDisaster(floodVnDf.iloc[0])

{'endMonth': 12, 'inYear': True, 'startMonth': 12}

### Statistic disaster by month

In [149]:
def statisticByMonth(data):
    statByMonth = np.zeros(12)
    for i in range(data.shape[0]):
        transaction = data.iloc[i]
        monthOfDisaster = getMonthOfDisaster(transaction)
        startMonth = monthOfDisaster['startMonth']
        endMonth = monthOfDisaster['endMonth']
        inYear = monthOfDisaster['inYear']
        if inYear == True:
            for i in range(startMonth, endMonth + 1):
                statByMonth[i-1] += 1
        else:
            for i in range(startMonth, 13):
                statByMonth[i-1] += 1
            for i in range(1, endMonth + 1):
                statByMonth[i-1] += 1
    np.delete(statByMonth, 0)
    return statByMonth

In [151]:
statDrought = statisticByMonth(droughtVnDf)
print(statDrought)

[4. 4. 3. 3. 5. 5. 4. 4. 4. 4. 4. 6.]


In [152]:
statFlood = statisticByMonth(floodVnDf)
print(statFlood)

[ 1.  0.  0.  1.  2.  3. 13. 14. 23. 32. 20.  8.]
