Lab Work №1

In [44]:
import os
import pandas as pd
from datetime import datetime
from urllib.request import urlopen

In [45]:
# Constants
country = 'UKR'
year_1 = str(1981)
year_2 = str(2020)
url = 'https://www.star.nesdis.noaa.gov/smcd/emb/vci/VH/get_TS_admin.php?'

In [46]:
# function which according to the province number downloads the following dataset
def download_data(province_n):
    # creating special url using which we can download specific data we actually need(parametrised)
    # using concatenation operation to add strings
    URL = url + 'country=' + country + '&provinceID=' + str(
        province_n) + '&year1=' + year_1 + '&year2=' + year_2 + '&type=Mean'
    # urlopen allows to open web pages
    open_url = urlopen(URL)
    # using datetime to get data and time
    now = datetime.now()  # current date and time
    # variable that allows to get current date months Year Hour Minute Second method: strftime
    current_date_and_time = now.strftime('%d%m%Y%H%M%S')
    # creating file, where I will save the data
    myFile = open(str(province_n) + ' ' + current_date_and_time + '.csv',
        'w')
    # getting data from the web page
    open_data = str(open_url.read())
    open_data = open_data.split('<pre>')[1]
    open_data = open_data[:open_data.find('</pre>')]
    open_data = open_data.replace('\\n', '\n')
    # putting data i just got by .read() into file, which I created
    myFile.write(open_data)
    # closing the file
    myFile.close()

In [47]:
for i in range(1, 28):
        download_data(i)

In [48]:
# dict to associate indexes with regions
obl_dict = {1: 'Cherkasy', 2: 'Chernihiv', 3: 'Chernivtsi', 4: 'Crimea', 5: 'Dnipropetrovsk', 6: 'Donetsk',
                  7: 'Ivano-Frankivsk', 8: 'Kharkiv', 9: 'Kherson', 10: 'Khmelnylskyy', 11: 'Kiev', 12: 'Kiev_city',
                  13: 'Kirovohrad', 14: 'Luhansk', 15: 'Lviv', 16: 'Mykolayiv', 17: 'Odessa', 18: 'Poltava',
                  19: 'Rivne', 20: 'Sevastopol', 21: 'Sumy', 22: 'Ternopil', 23: 'Transcarpatia', 24: 'Vinnytsya',
                  25: 'Volyn', 26: 'Zaporizhzhya', 27: 'Zhytomir'}

In [49]:
def form_frame_by_id(ID, path='./'):
    """
    function, which creates Dataframe according to the ID
    """
    files_list = os.listdir(path)
    files_list = list(filter(lambda s: s.endswith('.csv'), files_list))
    file_name = list(filter(lambda s: s.split()[0] == str(ID), files_list))[0]
    # opening dataFrame
    df = pd.read_csv(path + file_name, header=0)
    # colums old names
    colums_names = df.columns.values
    # changing old names for the new ones
    df = df.rename(
        columns={colums_names[0]: 'Year', colums_names[1]: 'Week', colums_names[2]: 'NDVI', colums_names[3]: 'BT',
                 colums_names[4]: 'VC', colums_names[5]: 'TC', colums_names[6]: 'VHI'})
    df.drop(columns=[colums_names[7]], inplace=True)
    # returning DataFrame created for our city
    return df


In [50]:
def frame_from_directory(path='./'):
    df = pd.DataFrame()
    for idx, reg in obl_dict.items():
        reg_df = form_frame_by_id(idx, path)
        reg_df['Region'] = reg
        df = df.append(reg_df, ignore_index=True)
    return df
        
df = frame_from_directory()

In [51]:
!ls -la

total 5128
drwxr-xr-x@ 34 aleksander  staff   1088 Nov 26 16:43 [1m[36m.[m[m
drwx------@ 40 aleksander  staff   1280 Nov 26 16:16 [1m[36m..[m[m
drwxr-xr-x  12 aleksander  staff    384 Nov 26 15:53 [1m[36m.git[m[m
-rw-r--r--   1 aleksander  staff   1799 Nov 26 15:53 .gitignore
drwxr-xr-x   3 aleksander  staff     96 Nov 26 16:21 [1m[36m.ipynb_checkpoints[m[m
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 1 26112020164245.csv
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 10 26112020164253.csv
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 11 26112020164254.csv
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 12 26112020164255.csv
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 13 26112020164256.csv
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 14 26112020164257.csv
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 15 26112020164258.csv
-rw-r--r--@  1 aleksander  staff  91520 Nov 26 16:42 16 26112020164258.csv
-rw-r--r--@

In [52]:
df.to_csv('data.csv', index=False)

In [53]:
df

Unnamed: 0,Year,Week,NDVI,BT,VC,TC,VHI,Region
0,1981,2,-1.0,-1.0,-1.0,-1.0,-1.0,Cherkasy
1,1981,3,-1.0,-1.0,-1.0,-1.0,-1.0,Cherkasy
2,1981,4,-1.0,-1.0,-1.0,-1.0,-1.0,Cherkasy
3,1981,5,-1.0,-1.0,-1.0,-1.0,-1.0,Cherkasy
4,1981,6,-1.0,-1.0,-1.0,-1.0,-1.0,Cherkasy
...,...,...,...,...,...,...,...,...
56128,2020,48,-1.0,-1.0,-1.0,-1.0,-1.0,Zhytomir
56129,2020,49,-1.0,-1.0,-1.0,-1.0,-1.0,Zhytomir
56130,2020,50,-1.0,-1.0,-1.0,-1.0,-1.0,Zhytomir
56131,2020,51,-1.0,-1.0,-1.0,-1.0,-1.0,Zhytomir


In [54]:
# Reindex for convenience
df_reindexed = df.set_index(['Region', 'Year', 'Week'])
df_reindexed

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,NDVI,BT,VC,TC,VHI
Region,Year,Week,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Cherkasy,1981,2,-1.0,-1.0,-1.0,-1.0,-1.0
Cherkasy,1981,3,-1.0,-1.0,-1.0,-1.0,-1.0
Cherkasy,1981,4,-1.0,-1.0,-1.0,-1.0,-1.0
Cherkasy,1981,5,-1.0,-1.0,-1.0,-1.0,-1.0
Cherkasy,1981,6,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...
Zhytomir,2020,48,-1.0,-1.0,-1.0,-1.0,-1.0
Zhytomir,2020,49,-1.0,-1.0,-1.0,-1.0,-1.0
Zhytomir,2020,50,-1.0,-1.0,-1.0,-1.0,-1.0
Zhytomir,2020,51,-1.0,-1.0,-1.0,-1.0,-1.0


In [55]:
# Drop future weeks or weeks with no data
no_data_weeks_idx = df_reindexed[df_reindexed['NDVI'] == -1].index
df_reindexed = df_reindexed.drop(no_data_weeks_idx)
df_reindexed

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,NDVI,BT,VC,TC,VHI
Region,Year,Week,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Cherkasy,1981,35,0.255,292.68,13.75,73.72,43.74
Cherkasy,1981,36,0.255,292.31,19.82,63.72,41.77
Cherkasy,1981,37,0.258,291.88,30.21,51.66,40.93
Cherkasy,1981,38,0.259,291.08,39.91,42.64,41.28
Cherkasy,1981,39,0.256,289.75,47.88,37.60,42.74
...,...,...,...,...,...,...,...
Zhytomir,2020,42,0.246,282.87,56.55,21.51,39.03
Zhytomir,2020,43,0.222,281.15,59.03,19.47,39.25
Zhytomir,2020,44,0.204,279.90,62.43,10.08,36.26
Zhytomir,2020,45,0.193,278.66,70.33,5.25,37.79


In [56]:
df_reindexed['VHI'].min(level=['Region', 'Year'])

Region    Year
Cherkasy  1981    40.93
          1982    23.83
          1983    34.32
          1984    30.24
          1985    29.31
                  ...  
Zhytomir  2016    37.18
          2017    39.84
          2018    39.32
          2019    30.24
          2020    36.26
Name: VHI, Length: 1080, dtype: float64

In [57]:
df_reindexed['VHI'].max(level=['Region', 'Year'])

Region    Year
Cherkasy  1981    64.76
          1982    64.58
          1983    47.56
          1984    71.01
          1985    63.66
                  ...  
Zhytomir  2016    56.23
          2017    61.74
          2018    60.82
          2019    60.57
          2020    58.33
Name: VHI, Length: 1080, dtype: float64

In [58]:
df_reindexed.groupby(level=['Region', 'Year']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,NDVI,BT,VC,TC,VHI
Region,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Cherkasy,1981,0.259,292.68,65.71,73.72,64.76
Cherkasy,1982,0.396,295.43,68.18,89.14,64.58
Cherkasy,1983,0.402,296.21,52.88,76.96,47.56
Cherkasy,1984,0.428,294.34,55.66,90.66,71.01
Cherkasy,1985,0.406,296.62,49.10,92.34,63.66
...,...,...,...,...,...,...
Zhytomir,2016,0.473,297.77,79.44,95.84,56.23
Zhytomir,2017,0.468,296.69,76.80,80.87,61.74
Zhytomir,2018,0.474,296.99,84.50,64.53,60.82
Zhytomir,2019,0.483,296.71,89.24,46.55,60.57


In [59]:
# Get extreme droughts province year
list(set([i[:2] for i in df_reindexed[df_reindexed['VHI'] < 15].index]))

[('Kiev', 2000),
 ('Cherkasy', 2000),
 ('Kherson', 2003),
 ('Crimea', 2012),
 ('Zaporizhzhya', 2007),
 ('Luhansk', 1993),
 ('Kiev_city', 2000),
 ('Odessa', 2007),
 ('Sevastopol', 2000),
 ('Donetsk', 1993),
 ('Mykolayiv', 2007),
 ('Vinnytsya', 2000),
 ('Luhansk', 1986),
 ('Sumy', 1984),
 ('Kherson', 2007),
 ('Kiev_city', 1999),
 ('Crimea', 2007),
 ('Donetsk', 1986),
 ('Volyn', 1994),
 ('Kharkiv', 2000)]

In [60]:
# Get moderate droughts province year
moderate_droughts = df_reindexed[(df_reindexed['VHI'] > 15) & (df_reindexed['VHI'] < 35)].index
moderate_droughts_years = list(set([i[:2] for i in moderate_droughts]))
print(f"{len(moderate_droughts_years) = }")
print(f"{moderate_droughts_years[:5] = }")

len(moderate_droughts_years) = 739
moderate_droughts_years[:5] = [('Cherkasy', 1982), ('Lviv', 1996), ('Khmelnylskyy', 2009), ('Ternopil', 1988), ('Cherkasy', 1991)]
