# Soluciones SQL y Pandas

## Ejercicios

1. Utiliza un conector de SQLAlchemy para cargar la base de datos `flights.db`.
2. Una vez cargados los datos realice las siguientes sentandias SQL con Pandas:
 * ```SELECT * FROM airports WHERE latitude > 20 AND longitude > 20```
 * ```SELECT city FROM airports WHERE country = 'Russia' OR country = 'China'```
 * ```SELECT source, COUNT(*) FROM routes GROUP BY source```
 * ```SELECT country, MIN(latitude), MAX(latitude) FROM airports GROUP BY country```
 * ```SELECT airline, airline_id  FROM routes ORDER BY source ASC, dest DESC```


In [38]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np

db = create_engine('sqlite:///flights.db')
airports = pd.read_sql('select * from airports;', con = db).replace('\\N', np.nan)
airlines = pd.read_sql('select * from airlines;', con = db).replace('\\N', np.nan)
routes = pd.read_sql('select * from routes;', con = db).replace('\\N', np.nan)
airports.info()
airlines.info()
routes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8107 entries, 0 to 8106
Data columns (total 13 columns):
index        8107 non-null int64
id           8107 non-null object
name         8107 non-null object
city         8107 non-null object
country      8107 non-null object
code         5880 non-null object
icao         6785 non-null object
latitude     8107 non-null object
longitude    8107 non-null object
altitude     8107 non-null object
offset       8107 non-null object
dst          8107 non-null object
timezone     8015 non-null object
dtypes: int64(1), object(12)
memory usage: 823.4+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6048 entries, 0 to 6047
Data columns (total 9 columns):
index       6048 non-null int64
id          6048 non-null object
name        6048 non-null object
alias       138 non-null object
iata        1460 non-null object
icao        5796 non-null object
callsign    5302 non-null object
country     6030 non-null object
active      6048 non-null objec

In [39]:
# SELECT * FROM airports WHERE latitude > 20 AND longitude > 20
airports[(pd.to_numeric(airports['latitude']) > 20) & (pd.to_numeric(airports['longitude']) > 20)]

Unnamed: 0,index,id,name,city,country,code,icao,latitude,longitude,altitude,offset,dst,timezone
408,408,411,Amari,Armari Air Force Base,Estonia,,EEEI,59.260286,24.208467,65,2,E,Europe/Tallinn
409,409,412,Kardla,Kardla,Estonia,KDL,EEKA,58.990756,22.830733,18,2,E,Europe/Tallinn
410,410,413,Kuressaare,Kuressaare,Estonia,URE,EEKE,58.229883,22.509494,14,2,E,Europe/Tallinn
411,411,414,Parnu,Parnu,Estonia,EPU,EEPU,58.419044,24.472819,47,2,E,Europe/Tallinn
412,412,415,Tallinn,Tallinn-ulemiste International,Estonia,TLL,EETN,59.413317,24.832844,131,2,E,Europe/Tallinn
413,413,416,Tartu,Tartu,Estonia,TAY,EETU,58.307461,26.690428,219,2,E,Europe/Tallinn
414,414,417,Enontekio,Enontekio,Finland,ENF,EFET,68.362586,23.424322,1005,2,E,Europe/Helsinki
415,415,418,Eura,Eura,Finland,,EFEU,61.116112,22.201389,259,2,E,Europe/Helsinki
416,416,419,Halli,Halli,Finland,KEV,EFHA,61.85605,24.7866,479,2,E,Europe/Helsinki
417,417,420,Helsinki Malmi,Helsinki,Finland,HEM,EFHF,60.254558,25.042828,57,2,E,Europe/Helsinki


In [40]:
# SELECT city FROM airports WHERE country = 'Russia' OR country = 'China'
airports[(airports['country'] == 'Russia') | (airports['country'] == 'China')]

Unnamed: 0,index,id,name,city,country,code,icao,latitude,longitude,altitude,offset,dst,timezone
2847,2847,2923,Yakutsk,Yakutsk,Russia,YKS,UEEE,62.09325,129.770672,325,10,N,Asia/Yakutsk
2848,2848,2925,Mirny,Mirnyj,Russia,MJZ,UERR,62.534689,114.038928,1156,10,N,Asia/Yakutsk
2849,2849,2926,Ignatyevo,Blagoveschensk,Russia,BQS,UHBB,50.425394,127.412478,638,10,N,Asia/Yakutsk
2850,2850,2927,Novy,Khabarovsk,Russia,KHV,UHHH,48.528044,135.188361,244,11,N,Asia/Vladivostok
2852,2852,2929,Provideniya Bay,Provideniya Bay,Russia,PVS,UHMD,64.378139,-173.243306,71,12,N,Asia/Magadan
2853,2853,2930,Sokol,Magadan,Russia,GDX,UHMM,59.910989,150.720439,574,12,N,Asia/Magadan
2854,2854,2931,Pevek,Pevek,Russia,PWE,UHMP,69.783283,170.597006,11,12,N,
2855,2855,2932,Yelizovo,Petropavlovsk,Russia,PKC,UHPP,53.167889,158.453669,131,12,N,Asia/Magadan
2856,2856,2933,Khomutovo,Yuzhno-sakhalinsk,Russia,UUS,UHSS,46.888672,142.717531,59,11,N,Asia/Vladivostok
2857,2857,2934,Knevichi,Vladivostok,Russia,VVO,UHWW,43.398953,132.148017,46,11,N,Asia/Vladivostok


In [41]:
# SELECT source, COUNT(*) FROM routes GROUP BY source
routes.groupby('source').size()

source
AAE      9
AAL     20
AAN      2
AAQ      3
AAR      8
AAT      2
AAX      1
AAY      1
ABA      4
ABB      2
ABD      6
ABE     13
ABI      2
ABJ     50
ABL      4
ABM      1
ABQ     42
ABR      1
ABS      1
ABT      3
ABV     30
ABX      4
ABY      4
ABZ     41
ACA      8
ACC     54
ACE    116
ACH      2
ACI      2
ACK      6
      ... 
ZGS      6
ZGU      3
ZHA     19
ZHY      2
ZIG      2
ZIH      9
ZKE      2
ZKG      2
ZLO      7
ZLT      2
ZMT      1
ZNE      2
ZNZ     11
ZOS      1
ZPB      2
ZQN      7
ZQW      5
ZQZ      2
ZRH    247
ZRJ      2
ZSA      4
ZSE      3
ZSJ      3
ZTB      2
ZTH     16
ZUH     60
ZUM      2
ZVK      3
ZYI     15
ZYL      4
Length: 3409, dtype: int64

In [42]:
# SELECT country, MIN(latitude), MAX(latitude) FROM airports GROUP BY country
airports.groupby('country')['latitude'].agg(['min', 'max'])

Unnamed: 0_level_0,min,max
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,30.969167,37.1211
Albania,41.414742,41.414742
Algeria,21.375,36.822225
American Samoa,-14.184444,-14.331
Angola,-10.721956,-9.771944
Anguilla,18.204834,18.204834
Antarctica,-62.190833,-89.999997
Antigua and Barbuda,17.136749,17.6358
Argentina,-22.150556,-54.843278
Armenia,40.122114,41.04845


In [43]:
# SELECT airline, airline_id  FROM routes ORDER BY source ASC, dest DESC
routes.sort_values(by=['source', 'dest'], ascending=[True, False])[['airline', 'airline_id']]

Unnamed: 0,airline,airline_id
10045,AH,794
67439,ZI,21
10044,AH,794
10043,AH,794
67438,ZI,21
10042,AH,794
10041,AH,794
10040,AH,794
10039,AH,794
22174,DX,1954
