In [117]:
import pandas as pd
import csv, requests, os, re, itertools, codecs
from tqdm import tqdm
from time import sleep
import simplejson as json

### Load data and get list of all countries

In [121]:
airrights = pd.read_csv('flight_rights_unicities.tsv', sep='\t')
treaties = pd.read_csv('treaties_unicities.tsv', sep='\t')
schedules = pd.read_csv('allschedules_unicities.tsv', sep='\t')

countries = (treaties.encountry.values.tolist() +
             airrights.encountry.values.tolist() +
             schedules.encountry_from.values.tolist() +
             schedules.encountry_to.values.tolist())

countries = sorted(list(set(countries)), key=str.lower)

### Get dictionary (*pythonic json* XD) of all countries

**Structure**:

+ country treaty:
    * date,
    * status, 
    * url for rada page
    - conditions within the treaty
        * points in English & Ukrainian
        * comment (empty if no comments)        
        - limits of number of airlines that can exploit all routes in country(scope 'condition')
        or per route (scope 'point')
            * limit - number of airlines
            * scope - condition or per route
        - flight limits: how many flights can be done within scope        
            * limit,
            * scope - condition in general, per point, per airline            
        - permissions to fly for Ukrainian companies, given according to treaty condition        
            * points in Ukraine and other country
            * permission id, date when received, No of protocol etc
            * airline
            * maximum flight frequency for airline on route
            * how many flights are scheduled per week **! not all schedules are awailable**
            * flight numbers of sheduled flights (**possible link to google flights???**)          

In [122]:
country_dicts = []

for country in countries:
    if country == 'Ukraine':
        continue
    
    treaties_c = treaties.loc[treaties.encountry == country, :].fillna('')
    airrights_c = airrights.loc[airrights.encountry == country, :].fillna('')
    schedules_c = schedules.loc[((schedules.encountry_from == country) |
                                 (schedules.encountry_to == country)), :].fillna('')
    
# ---make the most restricted conditions in treaty appear first, and condition for all - last.
# otherwise all given rights will belong to the last condition for all rest points---
    treaties_c['sort'] = (treaties_c.encities_ua == 'all').astype(int)
    treaties_c['sort'] += (treaties_c.encities_foreign == 'all').astype(int)
    treaties_c.sort_values(by='sort', inplace=True)
    
# ---in treaties if condition is full, replace 'all' to country name or 'other'---
    
    if treaties_c.encities_foreign.str.contains('all').sum() < len(treaties_c):
        treaties_c.loc[treaties_c.encities_foreign.str.contains('all'), 'encities_foreign'] = 'other'
        treaties_c.loc[treaties_c.cities_foreign.str.contains('всі'), 'cities_foreign'] = 'інші'
    else:
        treaties_c.loc[treaties_c.encities_foreign.str.contains('all'), 'encities_foreign'] = country
        treaties_c.loc[treaties_c.cities_foreign.str.contains('всі'), 'cities_foreign'] = treaties_c.country.iloc[0]

# replace for Ukrainian points
    if treaties_c.encities_ua.str.contains('all').sum() < len(treaties_c):
        treaties_c.loc[treaties_c.encities_ua.str.contains('all'), 'encities_ua'] = 'other'
        treaties_c.loc[treaties_c.cities_ua.str.contains('всі'), 'cities_ua'] = 'інші'
    else:
        treaties_c.loc[treaties_c.encities_ua.str.contains('all'), 'encities_ua'] = 'Ukraine'
        treaties_c.loc[treaties_c.cities_ua.str.contains('всі'), 'cities_ua'] = 'Україна'
    
# ---initialize country dictionary---
    country_dict = {
        'encoutry': country,
        'treaty':{
            'conditions': []
        }
    }
    
    country_dict['treaty']['uacountry'] = treaties_c.country.iloc[0]
    country_dict['treaty']['date'] = treaties_c.date.iloc[0]
    country_dict['treaty']['url'] = treaties_c.url.iloc[0]
    country_dict['treaty']['stage'] = treaties_c.stage.iloc[0]

    chosen_ids = [] # to prevent more specified conditions from being selected to general ones

# ---Find flight permissions for every treaty---
    for i, condition in enumerate(treaties_c.to_dict(orient='records')):
        
#     new condition dict           
        new_condition = {
            'ua_from': condition['cities_ua'],
            'en_from': condition['encities_ua'],
            'ua_to': condition['cities_foreign'],
            'en_to': condition['encities_foreign'],
            'comment': condition['comments'],
            'flight_limits': [],
            'airline_limits': [],
            'permissions': []
        }
        
# add scope to limits
#         scopes: 
#             - condition: general limit of flights within condition
#             - point: limit of flights to each point
#             - airline: limit of flights for every airline
        
        if condition['airlines_total'] != 999:
            new_condition['airline_limits'].append({'limit': condition['airlines_total'], 'scope': 'condition'})
            
        if (condition['airlines_route'] != 999
            and condition['airlines_route'] != condition['airlines_total']):
            new_condition['airline_limits'].append({'limit': condition['airlines_route'], 'scope': 'point'})
            
        if condition['fw_gen'] != 999:
            new_condition['flight_limits'].append({'limit': condition['fw_gen'], 'scope': 'condition'})
            
        if (condition['fw_route'] != 999
            and condition['fw_route'] != condition['fw_gen']):
            new_condition['flight_limits'].append({'limit': condition['fw_route'], 'scope': 'point'})
            
        if (condition['fw_airline'] != 999
            and condition['fw_airline'] != condition['fw_gen']
            and condition['fw_airline'] != condition['fw_route']):
            new_condition['flight_limits'].append({'limit': condition['fw_airline'], 'scope': 'airline'})
        
        
        encities_ua = condition['encities_ua'] if condition['encities_ua'] != 'all' else '.'
        encities_foreign = condition['encities_foreign'] if condition['encities_foreign'] != 'all' else '.'

        condition_rights = airrights_c.loc[ airrights_c.from_encity.str.contains(encities_ua) &
                                            airrights_c.to_encity.str.contains(encities_foreign) &
                                            ~ airrights_c.right_id.isin(chosen_ids), ] 
        
# add chosen rights to list of all chosen fly permissions
        chosen_ids += condition_rights.right_id.values.tolist()

        condition_rights = condition_rights.to_dict(orient='records')

# ---from schedules select flights, that are made according to this rule and fill to dict---
        for i, right in enumerate(condition_rights):
            right_schedule = schedules_c.loc[((schedules_c.airline.str.contains(right['iata_airline']) |
                                                schedules_c.airline.str.contains(right['icao_airline'])) &
                                               (schedules_c.encity_from.str.contains(right['from_encity']) |
                                                schedules_c.encity_from.str.contains(right['to_encity'])) &
                                               (schedules_c.encity_to.str.contains(right['from_encity']) |
                                                schedules_c.encity_to.str.contains(right['to_encity']))), ]

# add info about total # of route flights and flight numbers within this right
            if len(list(set(right_schedule.flight.values.tolist()))) > 0:
                condition_rights[i]['schedules'] = {
                    'freq': right_schedule.days_week.apply(lambda x: len(str(x))).drop_duplicates().sum(),
                    'flights': list(set(right_schedule.flight.values.tolist()))
                }

        new_condition['permissions'] = condition_rights
                
# ---add conditions to countrydict---
        country_dict['treaty']['conditions'].append(new_condition)
    
# --- append country pre-json to overall one
    country_dicts.append(country_dict)

**! Attention ! The code below rewrites json file**
- Use it wise and #comment after usage

In [123]:
# with codecs.open('C:/Users/Nadya/Desktop/aviaroutes/avia_table.json', 'w', encoding='utf-8') as f:
#     f.write(json.dumps(country_dicts))

In [124]:
country_dicts

[{'encoutry': 'Albania',
  'treaty': {'conditions': [{'airline_limits': [],
     'comment': '',
     'en_from': 'Kiev',
     'en_to': 'Tirana',
     'flight_limits': [{'limit': 7, 'scope': 'condition'}],
     'permissions': [{'airline_name': 'Браво',
       'annul_date': '',
       'annul_reason': '',
       'country': 'Республіка Албанія',
       'date': '02.03.2017',
       'encountry': 'Albania',
       'from_city': 'Київ',
       'from_encity': 'Kiev',
       'given_by': 'Протокол засідання комісії ДАС № 22 від 27.02.2017',
       'given_date': '03.03.2017',
       'iata_airline': '',
       'icao_airline': 'BAY',
       'max_freq': '2',
       'min_freq': 0,
       'right_id': 1859,
       'to_city': 'Тірана',
       'to_encity': 'Tirana',
       'type_of_right': 'Регулярні міжнародні',
       'valid_from': '26.03.2017',
       'valid_till': '26.10.2067'},
      {'airline_name': 'ДАРТ',
       'annul_date': '',
       'annul_reason': '',
       'country': 'Республіка Албанія',
   