## Creating snowflake connection and pushing data 

In [9]:
from sqlalchemy import create_engine
import pandas as pd


engine = create_engine(
    'snowflake://{user}:{password}@{account}/'.format(
        user='',
        password='',
        account='',
        warehouse='COMPUTE_WH',
        database='MY_TEST', 
        schema='NEW'
    )
)
sfOptions = {
    "sfURL": "https://lv50176.switzerland-north.azure.snowflakecomputing.com",
    
    "sfDatabase": "MY_TEST",
    "sfSchema": "NEW",
    "sfWarehouse": "COMPUTE_WH",
}
try:
    connection = engine.connect()
    connection.execute('USE ROLE ACCOUNTADMIN')
    connection.execute('USE DATABASE MY_TEST')
    connection.execute('USE SCHEMA NEW')

    #airports = pd.read_csv('/workspace/Airport_Pipeline/Airport_data/airports.csv')
    #airports.to_sql('airports', con=connection, if_exists='replace',index = False,chunksize=16000)   

    #airport_frequencies = pd.read_csv('/workspace/Airport_Pipeline/Airport_data/airport-frequencies.csv')
    #airport_frequencies.to_sql('airport_frequencies', con=connection, if_exists='replace',index = False,chunksize=16000) 

    #countries = pd.read_csv('/workspace/Airport_Pipeline/Airport_data/countries.csv')
    #countries.to_sql('countries', con=connection, if_exists='replace',index = False,chunksize=16000)

    #airport_comments = pd.read_csv('/workspace/Airport_Pipeline/Airport_data/airport-comments.csv')
    #airport_comments.to_sql('airport_comments', con=connection, if_exists='replace',index = False,chunksize=16000)

    #navaids = pd.read_csv('/workspace/Airport_Pipeline/Airport_data/navaids.csv')
    #navaids.to_sql('navaids', con=connection, if_exists='replace',index = False,chunksize=16000)

    #regions = pd.read_csv('/workspace/Airport_Pipeline/Airport_data/regions.csv')
    #regions.to_sql('regions', con=connection, if_exists='replace',index = False,chunksize=16000)

    #runways = pd.read_csv('/workspace/Airport_Pipeline/Airport_data/runways.csv')
    #runways.to_sql('runways', con=connection, if_exists='replace',index = False,chunksize=16000)

    connection.execute('PUT file://../Airport_Pipeline/Geonames_data/all_countries.csv @~;')
    connection.execute('''COPY INTO geonames FROM @~ FILE_FORMAT = (TYPE = CSV, SKIP_HEADER = 1) ON_ERROR=CONTINUE;''')
    

finally:
    connection.close()
    engine.dispose()
    
    

### 1. Basic Cleaning and Creating Views in Snowflake 

In [1]:
from sqlalchemy import create_engine
import pandas as pd


engine = create_engine(
    'snowflake://{user}:{password}@{account}/'.format(
        user='GEORGE9042',
        password='George9042',
        account='plninim-tg58176',
        warehouse='COMPUTE_WH',
        database='MY_TEST', 
        schema='NEW'
    )
)
try:
    connection = engine.connect()
    connection.execute('USE ROLE ACCOUNTADMIN')
    connection.execute('USE DATABASE MY_TEST')
    connection.execute('USE SCHEMA NEW')
    clean_continent = '''UPDATE airports SET CONTINENT = COALESCE(CONTINENT, 'NA')'''
    clean_country = '''UPDATE airports SET iso_country = COALESCE(iso_country, 'NA')'''

    query_continent = '''select * from airport_per_continent'''
    query_continent = pd.read_sql(query_continent,connection)

    query_avg_air_ele = '''select * from AVG_AIRPORT_ELEVATION_COUNTRY'''
    query_avg_air_ele = pd.read_sql(query_avg_air_ele,connection)

    min_max_ele = '''select * from MAX_MIN_ELEVATION''' 
    min_max_ele= pd.read_sql(min_max_ele,connection)
    
    query_country='''select * from airport_per_country '''
    query_country = pd.read_sql(query_country,connection)

    total_population_country = '''select * from total_population_country'''
    total_population_country = pd.read_sql(total_population_country,connection)

    city_towns_with_airport = '''select * from city_towns_with_airport'''
    city_towns_with_airport = pd.read_sql(city_towns_with_airport,connection)

    city_towns_with_geoname = '''select * from city_towns_with_geoname'''
    city_towns_with_geoname = pd.read_sql(city_towns_with_geoname,connection)

    min_max_avg_city_elevation = '''select * from min_max_avg_city_elevation'''
    min_max_avg_city_elevation = pd.read_sql(min_max_avg_city_elevation,connection)

    lowest_elevation_city = '''select * from lowest_elevated_city'''
    lowest_elevation_city = pd.read_sql(lowest_elevation_city,connection)

    highest_elevation_city = '''select * from highest_elevated_city'''
    highest_elevation_city = pd.read_sql(highest_elevation_city,connection)

    print(highest_elevation_city)
    
finally:
    connection.close()
    engine.dispose()

  connection.execute('USE ROLE ACCOUNTADMIN')


           name  elevation
0  GreenSite HQ     3200.0


### 1.1 How many airports, airfields and heliports exist in each country?

In [2]:
print(query_country)

    iso_country  airports  heliport  airfield
0            US      1496      7820     14860
1            IR        63        84       106
2            TM         7        24        16
3            VA         0         1         0
4            IN       128       273       174
..          ...       ...       ...       ...
240          GG         2         0         0
241          IE        12         5        83
242          SZ         2         0        14
243          GM         1         0         0
244          GW         1         0         6

[245 rows x 4 columns]


### 1.2 How many airports, airfields and heliports exist in each continent?

In [3]:
print(query_continent)

  continent  airports  heliport  airfield
0        EU      1065      1714      5679
1        AN         5         9        25
2        SA       434      2152      7312
3        AS      1483      5712      2587
4        NA      2487      8805     17931
5        AF       517       209      3056
6        OC       353       479      2829


### 2. What is the average elevation of the airports, airfields and heliports in each country? 

In [4]:
print(query_avg_air_ele)

    iso_country  avg_airport_elevation  avg_airfield_elevation   
0            JP             211.122642              418.512500  \
1            US             956.863636             1356.796806   
2            TZ            2414.500000             3389.411392   
3            KG            3177.833333             3882.882353   
4            KH              53.750000              148.250000   
..          ...                    ...                     ...   
240          GG             313.000000                     NaN   
241          IE             198.333333              200.029412   
242          SZ            1583.500000             1150.000000   
243          GM              95.000000                     NaN   
244          GW             129.000000              166.600000   

     avg_heliport_elevation  
0                734.636060  
1               1068.861724  
2                       NaN  
3               9452.000000  
4                203.000000  
..                      ...

### 3. What is the estimated population of each country?

In [5]:
print(total_population_country)

     total_population country_code
0              629735           HR
1             2253257           HU
2              910500           IQ
3                4866           JE
4              887412           JP
..                ...          ...
246                 0           PN
247             38556           ME
248            620329           MQ
249            332188           MU
250             51900           MV

[251 rows x 2 columns]


### 4. How many cities/towns/settlements in each country?

In [6]:
print(city_towns_with_geoname)

     city_towns_using_geoname country_code
0                     1191956           US
1                      417409           IN
2                      256697           ID
3                      208489           CA
4                      197747           NO
..                        ...          ...
246                        51           BL
247                        49           IO
248                        30           PN
249                        22           CC
250                         6           BV

[251 rows x 2 columns]


### 5. What is the min, max and average elevation of the cities per country?

In [7]:
print(min_max_avg_city_elevation)

     minimum_elevation  maximum_elevation  average_elevation country_code
0                  0.0             2655.0            1124.93           AD
1                  0.0             1676.0             423.74           AE
2                294.0             6513.0            2242.33           AF
3                365.0              565.0             465.00           AG
4                 11.0             2083.0            1029.63           AL
..                 ...                ...                ...          ...
221                2.0             3079.0             873.54           YE
222                0.0             3275.0             843.79           ZA
223             1035.0             1384.0            1251.18           ZM
224              230.0             2991.0            1247.21           ZW
225            -4800.0             3189.0              55.54         None

[226 rows x 4 columns]


### 6. Which are the highest and lowest elevated cities in the world with populations > 100000? 

In [8]:
print(lowest_elevation_city)

print(highest_elevation_city)

              name  elevation
0  Town of Babylon        0.0
           name  elevation
0  GreenSite HQ     3200.0


### 7. Which are the highest and lowest elevated airports, airfields and heliports on the planet?

In [9]:
print(min_max_ele)

             type  highest_elevation  lowest_elevation
0   seaplane_base               6534                -1
1     balloonport               5624                 4
2  medium_airport              14472             -1266
3        heliport              17372              -117
4   small_airport              16200              -210
5   large_airport              10860               -11
6          closed              14809              -223


## Creating Snowflake Tables

In [None]:
create or replace TABLE MY_TEST.NEW.AIRPORT_COMMENTS (
	ID NUMBER(38,0),
	threadRef FLOAT,
	airportRef NUMBER(38,0),
	airportIdent VARCHAR(16777216),
	date VARCHAR(16777216),
	memberNickname VARCHAR(16777216),
	subject VARCHAR(16777216),
	body VARCHAR(16777216),
    PRIMARY KEY (ID)
);

create or replace TABLE MY_TEST.NEW.AIRPORTS (
	ID NUMBER(38,0) NOT NULL,
	IDENT VARCHAR(16777216),
	TYPE VARCHAR(16777216),
	NAME VARCHAR(16777216),
	LATITUDE_DEG FLOAT,
	LONGITUDE_DEG FLOAT,
	ELEVATION_FT NUMBER(38,0),
	CONTINENT VARCHAR(16777216),
	ISO_COUNTRY VARCHAR(16777216),
	ISO_REGION VARCHAR(16777216),
	MUNICIPALITY VARCHAR(16777216),
	SCHEDULED_SERVICE VARCHAR(16777216),
	GPS_CODE VARCHAR(16777216),
	IATA_CODE VARCHAR(16777216),
	LOCAL_CODE VARCHAR(16777216),
	HOME_LINK VARCHAR(16777216),
	WIKIPEDIA_LINK VARCHAR(16777216),
	KEYWORDS VARCHAR(16777216),
	primary key (ID),
	foreign key (ISO_REGION) references MY_TEST.NEW.REGIONS(CODE)
);

create or replace TABLE MY_TEST.NEW.RUNWAYS (
	ID NUMBER(38,0) NOT NULL,
	AIRPORT_REF NUMBER(38,0),
	AIRPORT_IDENT VARCHAR(16777216),
	LENGTH_FT FLOAT,
	WIDTH_FT FLOAT,
	SURFACE VARCHAR(16777216),
	LIGHTED BOOLEAN,
	CLOSED BOOLEAN,
	LE_IDENT VARCHAR(16777216),
	LE_LATITUDE_DEG FLOAT,
	LE_LONGITUDE_DEG FLOAT,
	LE_ELEVATION_FT NUMBER(38,0),
	LE_HEADING_DEGT FLOAT,
	LE_DISPLACED_THRESHOLD_FT NUMBER(38,0),
	HE_IDENT VARCHAR(16777216),
	HE_LATITUDE_DEG FLOAT,
	HE_LONGITUDE_DEG FLOAT,
	HE_ELEVATION_FT NUMBER(38,0),
	HE_HEADING_DEGT FLOAT,
	HE_DISPLACED_THRESHOLD_FT NUMBER(38,0),
	primary key (ID),
	foreign key (AIRPORT_REF) references MY_TEST.NEW.AIRPORTS(ID)
);

create or replace TABLE MY_TEST.NEW.REGIONS (
	CODE VARCHAR(16777216) NOT NULL,
	LOCAL_CODE VARCHAR(16777216),
	ISO_COUNTRY VARCHAR(16777216),
	NAME VARCHAR(16777216),
	CONTINENT VARCHAR(16777216),
	primary key (CODE)
);

create or replace TABLE MY_TEST.NEW.NAVAIDS (
	ID_NAV NUMBER(38,0) NOT NULL,
	FILENAME VARCHAR(16777216),
	IDENT VARCHAR(16777216),
	NAME VARCHAR(16777216),
	TYPE VARCHAR(16777216),
	FREQUENCY_KHZ FLOAT,
	LATITUDE_DEG FLOAT,
	LONGITUDE_DEG FLOAT,
	ELEVATION_FT NUMBER(38,0),
	ISO_COUNTRY VARCHAR(16777216),
	DME_FREQUENCY_KHZ FLOAT,
	DME_CHANNEL NUMBER(38,0),
	DME_LATITUDE_DEG FLOAT,
	DME_LONGITUDE_DEG FLOAT,
	DME_ELEVATION_FT NUMBER(38,0),
	SLAVED_VARIATION_DEG FLOAT,
	MAGNETIC_VARIATION_DEG FLOAT,
	USAGE_TYPE VARCHAR(16777216),
	POWER FLOAT,
	ASSOCIATED_AIRPORT VARCHAR(16777216),
	ASSOCIATED_AIRPORT_IDENT VARCHAR(16777216),
	primary key (ID_NAV)
);

create or replace TABLE MY_TEST.NEW.COUNTRIES (
	ID NUMBER(38,0),
	CODE VARCHAR(16777216),
	NAME VARCHAR(16777216),
	CONTINENT VARCHAR(16777216),
	WIKIPEDIA_LINK VARCHAR(16777216),
	KEYWORDS VARCHAR(16777216)
	primary key (ID)
);

create or replace TABLE MY_TEST.NEW.AIRPORT_COMMENTS (
	ID NUMBER(38,0),
	threadRef FLOAT,
	airportRef NUMBER(38,0),
	airportIdent VARCHAR(16777216),
	DATE VARCHAR(16777216),
	memberNickname VARCHAR(16777216),
	SUBJECT VARCHAR(16777216),
	BODY VARCHAR(16777216)\
	PRIMARY KEY (ID)
);


### Views used in snowflake


In [None]:
airport_per_continent = '''create or replace view AIRPORT_PER_CONTINENT as
    -- comment = '<comment>'
    SELECT continent,    
       COUNT(CASE WHEN type IN ('seaplane_base','medium_airport', 'large_airport') THEN 1 END) AS airports,
       COUNT(CASE WHEN type = 'heliport' THEN 1 END) AS heliport,
       COUNT(CASE WHEN type = 'small_airport' THEN 1 END) AS airfield       
       FROM airports
       GROUP BY continent;'''

airport_per_country = '''create or replace view AIRPORT_PER_COUNTRY as
SELECT iso_country,
/* assumumes the small airport is an airfield*/
       COUNT(CASE WHEN type IN ('seaplane_base','medium_airport', 'large_airport') THEN 1 END) AS airports,
       COUNT(CASE WHEN type = 'heliport' THEN 1 END) AS heliport,
       COUNT(CASE WHEN type = 'small_airport' THEN 1 END) AS airfield       
       FROM airports
       GROUP BY iso_country;'''


min_max_elevation = '''create or replace view MY_TEST.NEW.MAX_MIN_ELEVATION(
	TYPE,
	HIGHEST_ELEVATION,
	LOWEST_ELEVATION
) as
SELECT type,
       MAX(elevation_ft) AS highest_elevation,
       MIN(elevation_ft) AS lowest_elevation
FROM airports
GROUP BY type;'''

avg_airport_elevation = '''create or replace view MY_TEST.NEW.AVG_AIRPORT_ELEVATION_COUNTRY(
	ISO_COUNTRY,
	AVG_AIRPORT_ELEVATION,
	AVG_AIRFIELD_ELEVATION,
	AVG_HELIPORT_ELEVATION
) as
SELECT iso_country,
       AVG(CASE WHEN type IN ('seaplane_base','medium_airport', 'large_airport') THEN elevation_ft END) AS avg_airport_elevation,
       AVG(CASE WHEN type = 'small_airport' THEN elevation_ft END) AS avg_airfield_elevation,
       AVG(CASE WHEN type = 'heliport' THEN elevation_ft END) AS avg_heliport_elevation
FROM airports
GROUP BY iso_country;'''

total_population_country = '''create or replace view MY_TEST.NEW.TOTAL_POPULATION_COUNTRY(
	TOTAL_POPULATION,
	COUNTRY_CODE
) as 
select sum(population) as total_population, country_code from geonames group by country_code;'''

city_towns_with_airport = '''create or replace view MY_TEST.NEW.CITY_TOWNS_WITH_AIRPORT(
	CITY_TOWNS_USING_AIRPORT,
	ISO_COUNTRY
) as
select count(distinct(municipality)) as city_towns_using_airport, iso_country from airports group by iso_country order by city_towns_using_airport desc;'''

city_towns_with_geoname = '''create or replace view MY_TEST.NEW.CITY_TOWNS_WITH_GEONAME(
	CITY_TOWNS_USING_GEONAME,
	COUNTRY_CODE
) as
select count(distinct(name)) as city_towns_using_geoname, country_code from geonames group by country_code order by city_towns_using_geoname desc;'''


min_max_avg_city_elevation = '''create or replace view MY_TEST.NEW.MIN_MAX_AVG_CITY_ELEVATION(
	MINIMUM_ELEVATION,
	MAXIMUM_ELEVATION,
	AVERAGE_ELEVATION,
	COUNTRY_CODE
) as 
select min(elevation) as minimum_elevation,
max(elevation) as maximum_elevation,
ROUND(avg(elevation), 2)as average_elevation,
country_code
from geonames
WHERE elevation IS NOT NULL
group by country_code 
order by country_code;'''