In [1]:
import  urllib.request
from datetime import datetime
import pandas as pd
import numpy as np
import os
 
def download(province_id):
    url="https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_provinceData.php?country=UKR&provinceID=%d&year1=1981&year2=2017&type=Mean"%province_id
    vhi_url = urllib.request.urlopen(url)
    #delete old files
    del_old_files(province_id)
    out = open('doc/vhi_{0}_{1}.csv'.format(province_id,datetime.strftime(datetime.now(),"%Y.%m.%d_%H%M")),"wb")
    data = vhi_url.read()[230:]
    prevline = vhi_url.readline()
    out.write(data)
    out.close()
    print("VHI %d is downloaded..."%province_id)
def del_old_files(province_id):
    files = os.listdir('doc/')
    old_files = filter(lambda x: x.startswith('vhi_%d_'%province_id), files)
    for file in old_files:
        os.remove('doc/%s'%file)
       
def read_to_frame(directory):
    import re
    allfiles = os.listdir(directory)
    #filter all files
    allfiles = filter(lambda x: x.startswith('vhi_'), allfiles)
    #create empty main frame
    main_frame = pd.DataFrame({'year':[],'week':[],'SMN':[],'SMT':[],'VCI':[],'TCI':[],'VHI':[],'province_id':[]})
    for file in allfiles:
        df = pd.read_table('%s/%s'%(directory, file),
                           names=['year','week','SMN','SMT','VCI','TCI','VHI'],
                           sep='[ ,]+',
                           engine = 'python')
        #get ID with file name
        province_id =  re.search(r'\d+', file).group(0)
        df['province_id'] = setNewProvinceIndex(int(province_id))
        #add main frame to df
        main_frame = pd.concat([main_frame,df], ignore_index=True)
    return main_frame
 
def extremum(frame, year, province_id):
    min = frame[(frame['year'] == year) & (frame['province_id'] == province_id)]['VHI'].min()
    max = frame[(frame['year'] == year) & (frame['province_id'] == province_id)]['VHI'].max()
    print('Minimum VHI for province %s on %s year'%(province_id, year))
    print(frame[(frame['VHI'] == min) & (frame['year'] == year) & (frame['province_id'] == province_id)])
   
    print('Maximum VHI for province %s on %s year'%(province_id, year))
    print(frame[(frame['VHI'] == max) & (frame['year'] == year) & (frame['province_id'] == province_id)])
    print('-------------')
def droughts(frame, province_id):
    print('Droughts VHI < 15')
    print(frame[(frame['VHI'] < 15) & (frame['province_id'] == province_id)])
    print('-------------')
def mod_droughts(frame, province_id):
    print('Moderete droughts VHI < 35')
    print(frame[(frame['VHI'] < 35) & (frame['province_id'] == province_id)])
    print('-------------')
def setNewProvinceIndex(old_index):
    reindex = {
        1:22,
        2:24,
        3:23,
        4:25,
        5:3,
        6:4,
        7:8,
        8:19,
        9:20,
        10:21,
        11:9,
        12:26,
        13:10,
        14:11,
        15:12,
        16:13,
        17:14,
        18:15,
        19:16,
        20:27,
        21:17,
        22:18,
        23:6,
        24:1,
        25:2,
        26:7,
        27:5
            }
    if old_index in reindex:
        return reindex[old_index]
    return np.nan
#for province_id in range(1, 28):
    #download(province_id)
frame = read_to_frame('doc/') 
frame

Unnamed: 0,SMN,SMT,TCI,VCI,VHI,province_id,week,year
0,,,,,,21.0,VHI,CI
1,0.290,288.83,89.45,16.20,52.83,21.0,35,1981
2,0.294,289.84,70.22,27.17,48.70,21.0,36,1981
3,0.291,290.14,53.92,36.19,45.06,21.0,37,1981
4,0.285,289.86,43.06,42.20,42.63,21.0,38,1981
5,0.275,288.79,38.47,46.12,42.30,21.0,39,1981
6,0.258,286.90,40.50,47.06,43.78,21.0,40,1981
7,0.232,284.22,46.78,44.43,45.60,21.0,41,1981
8,0.199,280.66,63.89,38.04,50.97,21.0,42,1981
9,0.158,276.35,78.62,28.35,53.49,21.0,43,1981


In [4]:
frame = read_to_frame('doc/')
frame[(frame['VHI'] < 15) & (frame['province_id'] == 7)]

Unnamed: 0,SMN,SMT,TCI,VCI,VHI,province_id,week,year
31833,0.283,308.65,4.34,24.17,14.26,7.0,27,2007
31834,0.27,308.87,3.74,23.03,13.39,7.0,28,2007
31835,0.261,308.88,3.22,23.44,13.33,7.0,29,2007
31836,0.249,308.81,2.66,23.47,13.06,7.0,30,2007
31837,0.236,308.75,2.75,23.51,13.13,7.0,31,2007
31838,0.223,308.57,2.09,22.92,12.51,7.0,32,2007
31839,0.212,308.29,1.8,21.31,11.55,7.0,33,2007
31840,0.201,307.86,1.65,20.12,10.88,7.0,34,2007
31841,0.194,306.87,1.68,20.45,11.06,7.0,35,2007
31842,0.187,305.11,1.77,22.32,12.05,7.0,36,2007


In [3]:
frame = read_to_frame('doc/')
extremum(frame, 1998, 7)
droughts(frame, 7)
mod_droughts(frame, 7)

Minimum VHI for province 7 on 1998 year
Empty DataFrame
Columns: [SMN, SMT, TCI, VCI, VHI, province_id, week, year]
Index: []
Maximum VHI for province 7 on 1998 year
Empty DataFrame
Columns: [SMN, SMT, TCI, VCI, VHI, province_id, week, year]
Index: []
-------------
Droughts VHI < 15
         SMN     SMT   TCI    VCI    VHI  province_id week  year
31833  0.283  308.65  4.34  24.17  14.26          7.0   27  2007
31834  0.270  308.87  3.74  23.03  13.39          7.0   28  2007
31835  0.261  308.88  3.22  23.44  13.33          7.0   29  2007
31836  0.249  308.81  2.66  23.47  13.06          7.0   30  2007
31837  0.236  308.75  2.75  23.51  13.13          7.0   31  2007
31838  0.223  308.57  2.09  22.92  12.51          7.0   32  2007
31839  0.212  308.29  1.80  21.31  11.55          7.0   33  2007
31840  0.201  307.86  1.65  20.12  10.88          7.0   34  2007
31841  0.194  306.87  1.68  20.45  11.06          7.0   35  2007
31842  0.187  305.11  1.77  22.32  12.05          7.0   36  2007
3