This notebook tests data queries for my Spyre app.

In [2]:
import pandas as pd
provinces = pd.read_csv('../lab1/ukr_provinces.csv', comment='#', sep='[, ]+', engine='python')
provinces

Unnamed: 0,CountryCode,provinceID,minLat,minLon,maxLat,maxLon,newID,province_name
0,UKR,1,48.4658,29.6201,50.2095,32.9217,22,Cherkasy
1,UKR,2,50.3263,30.4749,52.375,33.5088,24,Chernihiv
2,UKR,3,47.7299,24.8985,48.6767,27.5337,23,Chernivtsi
3,UKR,4,44.3821,32.4623,46.1905,36.6457,25,Crimea
4,UKR,5,47.4224,32.9624,49.1688,36.9234,3,Dnipropetrovsk
5,UKR,6,46.8547,36.5451,49.2586,39.073,4,Donetsk
6,UKR,7,47.7256,23.5408,49.5503,25.6406,8,Ivano-Frankivsk
7,UKR,8,48.5145,34.8923,50.4649,38.0916,19,Kharkiv
8,UKR,9,45.9012,31.5029,47.577,35.1345,20,Kherson
9,UKR,10,48.4516,26.1233,50.5936,27.9138,21,Khmelnytskyy


In [20]:
provinces.loc[(provinces['provinceID'] == 11)].iat[0,7]

'Kiev'

In [6]:
province_options = [{'label':row['province_name'],'value':row['provinceID']} for index,row in provinces.iterrows()]
province_options

[{'label': 'Cherkasy', 'value': 1},
 {'label': 'Chernihiv', 'value': 2},
 {'label': 'Chernivtsi', 'value': 3},
 {'label': 'Crimea', 'value': 4},
 {'label': 'Dnipropetrovsk', 'value': 5},
 {'label': 'Donetsk', 'value': 6},
 {'label': 'Ivano-Frankivsk', 'value': 7},
 {'label': 'Kharkiv', 'value': 8},
 {'label': 'Kherson', 'value': 9},
 {'label': 'Khmelnytskyy', 'value': 10},
 {'label': 'Kiev', 'value': 11},
 {'label': 'Kiev(City)', 'value': 12},
 {'label': 'Kirovohrad', 'value': 13},
 {'label': 'Luhansk', 'value': 14},
 {'label': 'Lviv', 'value': 15},
 {'label': 'Mykolayiv', 'value': 16},
 {'label': 'Odessa', 'value': 17},
 {'label': 'Poltava', 'value': 18},
 {'label': 'Rivne', 'value': 19},
 {'label': 'Sevastopol', 'value': 20},
 {'label': 'Sumy', 'value': 21},
 {'label': 'Ternopil', 'value': 22},
 {'label': 'Transcarpathia', 'value': 23},
 {'label': 'Vinnytsya', 'value': 24},
 {'label': 'Volyn', 'value': 25},
 {'label': 'Zaporizhzhya', 'value': 26},
 {'label': 'Zhytomyr', 'value': 27}]

In [7]:
week_options = [{'label':i,'value':i} for i in range(1,53,1)]
week_options[-1]

{'label': 52, 'value': 52}

In [12]:
import os
import re
from sys import stdout as out

def read_province_data(data_dir='../lab1/data', pattern='P[0-9]+-[0-9]+-[0-9]+.csv'):
    
    if not os.path.exists(data_dir): # Exit if the folder does not exist.
        out.write("Folder {} not found!\n".format(data_dir))
        return None
    
    regexp = re.compile(pattern)
    data_frame = pd.DataFrame()
    
    for name in (n for n in os.listdir(data_dir) if regexp.match(n)):
        file_path = os.path.join(data_dir, name)
        if os.path.isfile(file_path):
            out.write("Reading {}... ".format(name))

            # Detect province ID from the file name
            province_id = int(re.search('P([0-9]+)', name).group(1))
            
            # Read province data frame from the file
            province_frame = pd.read_csv(file_path, sep='[, ]+', engine='python', #index_col=[0,1],
                                         names=['Year','Week','SMN','SMT','VCI','TCI','VHI'])
            
            # Add column with province_id
            province_frame['Province'] = province_id
            
            # Append the resulting data frame
            data_frame = data_frame.append(province_frame)
            
            out.write("Province ID: {}\n".format(province_id))
    
    return data_frame

series = 'VHI'
province = 26
week_from = 2
week_to = 5
        
data_frame = read_province_data()
result_frame = data_frame.loc[(data_frame['Province'] == province)
                              & (data_frame['Week'] >= week_from)
                              & (data_frame['Week'] <= week_to),
                              ['Year', 'Week', series]]
result_frame

Reading P01-180308-213122.csv... Province ID: 1
Reading P02-180308-213122.csv... Province ID: 2
Reading P03-180308-213123.csv... Province ID: 3
Reading P04-180308-213124.csv... Province ID: 4
Reading P05-180308-213124.csv... Province ID: 5
Reading P06-180308-213125.csv... Province ID: 6
Reading P07-180308-213125.csv... Province ID: 7
Reading P08-180308-213126.csv... Province ID: 8
Reading P09-180308-213126.csv... Province ID: 9
Reading P10-180308-213127.csv... Province ID: 10
Reading P11-180308-213127.csv... Province ID: 11
Reading P12-180308-213128.csv... Province ID: 12
Reading P13-180308-213128.csv... Province ID: 13
Reading P14-180308-213129.csv... Province ID: 14
Reading P15-180308-213129.csv... Province ID: 15
Reading P16-180308-213130.csv... Province ID: 16
Reading P17-180308-213131.csv... Province ID: 17
Reading P18-180308-213131.csv... Province ID: 18
Reading P19-180308-213132.csv... Province ID: 19
Reading P20-180308-213132.csv... Province ID: 20
Reading P21-180308-213133.csv

Unnamed: 0,Year,Week,VHI
1,2014,2,39.69
2,2014,3,38.78
3,2014,4,38.5
4,2014,5,38.61
53,2015,2,42.02
54,2015,3,40.96
55,2015,4,41.71
56,2015,5,42.14
105,2016,2,38.95
106,2016,3,38.22
