# Data Exploration all'interno del Database (SQLserver) con 'SQLAlchemy'

## 1. Connessione al database

In [22]:
import pyodbc
from sqlalchemy import create_engine
import pandas as pd

In [24]:
engine = create_engine('mssql+pyodbc://LAPTOP-3P1FOCLU\SQLEXPRESS/covid_19?driver=SQL Server Native Client 11.0?Trusted_Connection=yes')

In [26]:
connection = engine.connect()

In [32]:
engine.table_names()

['covid_deaths', 'covid_vaccinations']

## 2. Testare la connessione

In [33]:
data = pd.read_sql_query('SELECT * FROM covid_deaths' , connection)

In [34]:
data.sample(10)

Unnamed: 0,iso_code,continent,location,day_of_year,population,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million
21240,AUT,Europe,Austria,2021-03-27,9006400.0,533786.0,3498.0,3192.286,9231.0,31.0,...,2.839,107.0,508.0,56404.0,1651.0,183314.0,,,,
70854,VEN,South America,Venezuela,2020-06-08,28435943.0,2473.0,96.0,115.857,22.0,0.0,...,0.025,121.0,,,,,,,,
9072,AND,Europe,Andorra,2021-03-25,77265.0,11687.0,49.0,42.0,114.0,0.0,...,1.849,116.0,,,,,,,,
61765,PRT,Europe,Portugal,2021-02-13,10196707.0,784079.0,2856.0,3167.571,15183.0,149.0,...,17.218,48.0,803.0,78751.0,4850.0,475644.0,,,,
37912,BHR,Asia,Bahrain,2020-12-31,1701583.0,92675.0,250.0,229.286,352.0,0.0,...,0.168,113.0,,,,,,,,
54630,POL,Europe,Poland,2021-05-10,37846605.0,2835083.0,2031.0,4189.571,70034.0,22.0,...,7.281,66.0,,,15925.0,420778.0,,,,
58041,SAU,Asia,Saudi Arabia,2020-03-04,34813867.0,1.0,0.0,,,,...,,,,,,,,,,
92424,LUX,Europe,Luxembourg,2021-04-15,625976.0,64549.0,215.0,195.571,785.0,1.0,...,5.249,99.0,32.0,5112.0,110.0,175726.0,,,,
4968,ARM,Asia,Armenia,2020-12-17,2963234.0,151392.0,1174.0,878.857,2581.0,25.0,...,7.955,86.0,,,,,,,,
13270,ESP,Europe,Spain,2020-11-29,46754783.0,1628208.0,0.0,10211.143,44668.0,0.0,...,6.261,8.0,,,,,375361.0,8028.0,4293932.0,91839.0


## 3. Elaborare una "Reflection"

In [36]:
from sqlalchemy import Table, MetaData

In [37]:
# Inizializzare l'oggetto di classe MetaData
metadata = MetaData()

In [38]:
# Fare il "reflect" di una tabella
covid_deaths = Table('covid_deaths', metadata, autoload=True, autoload_with=engine)

In [40]:
# Ispezionare le colonne
covid_deaths.columns.keys()

['iso_code',
 'continent',
 'location',
 'day_of_year',
 'population',
 'total_cases',
 'new_cases',
 'new_cases_smoothed',
 'total_deaths',
 'new_deaths',
 'new_deaths_smoothed',
 'total_cases_per_million',
 'new_cases_per_million',
 'new_cases_smoothed_per_million',
 'total_deaths_per_million',
 'new_deaths_per_million',
 'new_deaths_smoothed_per_million',
 'reproduction_rate',
 'icu_patients',
 'icu_patients_per_million',
 'hosp_patients',
 'hosp_patients_per_million',
 'weekly_icu_admissions',
 'weekly_icu_admissions_per_million',
 'weekly_hosp_admissions',
 'weekly_hosp_admissions_per_million']

In [43]:
repr(covid_deaths)

"Table('covid_deaths', MetaData(bind=None), Column('iso_code', NVARCHAR(length=255), table=<covid_deaths>), Column('continent', NVARCHAR(length=255), table=<covid_deaths>), Column('location', NVARCHAR(length=255), table=<covid_deaths>), Column('day_of_year', DATETIME(), table=<covid_deaths>), Column('population', FLOAT(precision=53), table=<covid_deaths>), Column('total_cases', FLOAT(precision=53), table=<covid_deaths>), Column('new_cases', FLOAT(precision=53), table=<covid_deaths>), Column('new_cases_smoothed', FLOAT(precision=53), table=<covid_deaths>), Column('total_deaths', INTEGER(), table=<covid_deaths>), Column('new_deaths', INTEGER(), table=<covid_deaths>), Column('new_deaths_smoothed', FLOAT(precision=53), table=<covid_deaths>), Column('total_cases_per_million', FLOAT(precision=53), table=<covid_deaths>), Column('new_cases_per_million', FLOAT(precision=53), table=<covid_deaths>), Column('new_cases_smoothed_per_million', FLOAT(precision=53), table=<covid_deaths>), Column('total

## 4. Prime query 

In [46]:
# Query semplice en "raw SQL"
statement = 'SELECT TOP(10) * FROM covid_deaths'
results = connection.execute(statement).fetchall()
results

[('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 7, 0, 0), 444919060.0, 20418116.0, 68143.0, 112026.0, 493667, 1425, 3220.714, 45891.754, 153.158, 251.79, '1109,566', '3,203', 7.239, None, None, None, None, None, None, None, None, None),
 ('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 8, 0, 0), 444919060.0, 20502181.0, 84065.0, 106419.286, 496659, 2992, 3161.571, 46080.698, 188.944, 239.188, '1116,291', '6,725', 7.106, None, None, None, None, None, None, None, None, None),
 ('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 9, 0, 0), 444919060.0, 20593535.0, 91354.0, 102879.857, 500809, 4150, 3112.714, 46286.026, 205.327, 231.233, '1125,618', '9,328', 6.996, None, None, None, None, None, None, None, None, None),
 ('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 10, 0, 0), 444919060.0, 20709189.0, 115654.0, 99781.571, 504216, 3407, 3053.857, 46545.97, 259.944, 224.269, '1133,276', '7,658', 6.864, None, None, None, None, None, 

In [50]:
# Query semplice in "pythonic" SQL
from sqlalchemy import select

statement_2 = select([covid_deaths])
results_2 = connection.execute(statement_2).fetchmany(size=10)
results_2

[('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 7, 0, 0), 444919060.0, 20418116.0, 68143.0, 112026.0, 493667, 1425, 3220.714, 45891.754, 153.158, 251.79, '1109,566', '3,203', 7.239, None, None, None, None, None, None, None, None, None),
 ('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 8, 0, 0), 444919060.0, 20502181.0, 84065.0, 106419.286, 496659, 2992, 3161.571, 46080.698, 188.944, 239.188, '1116,291', '6,725', 7.106, None, None, None, None, None, None, None, None, None),
 ('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 9, 0, 0), 444919060.0, 20593535.0, 91354.0, 102879.857, 500809, 4150, 3112.714, 46286.026, 205.327, 231.233, '1125,618', '9,328', 6.996, None, None, None, None, None, None, None, None, None),
 ('OWID_EUN', None, 'European Union', datetime.datetime(2021, 2, 10, 0, 0), 444919060.0, 20709189.0, 115654.0, 99781.571, 504216, 3407, 3053.857, 46545.97, 259.944, 224.269, '1133,276', '7,658', 6.864, None, None, None, None, None, 

In [52]:
# Selezionare una riga
first_row = results[0]

# Selezionare una colonna
first_row['location'] 

'European Union'

In [54]:
# Totali di morti per giorno in Italia
statement_3 = select([covid_deaths]).where(covid_deaths.columns.location == 'Italy')
result_3 = connection.execute(statement_3)

for result in result_3:
    print(result.location, result.day_of_year, result.total_deaths)

Italy 2020-01-31 00:00:00 None
Italy 2020-02-01 00:00:00 None
Italy 2020-02-02 00:00:00 None
Italy 2020-02-03 00:00:00 None
Italy 2020-02-04 00:00:00 None
Italy 2020-02-05 00:00:00 None
Italy 2020-02-06 00:00:00 None
Italy 2020-02-07 00:00:00 None
Italy 2020-02-08 00:00:00 None
Italy 2020-02-09 00:00:00 None
Italy 2020-02-10 00:00:00 None
Italy 2020-02-11 00:00:00 None
Italy 2020-02-12 00:00:00 None
Italy 2020-02-13 00:00:00 None
Italy 2020-02-14 00:00:00 None
Italy 2020-02-15 00:00:00 None
Italy 2020-02-16 00:00:00 None
Italy 2020-02-17 00:00:00 None
Italy 2020-02-18 00:00:00 None
Italy 2020-02-19 00:00:00 None
Italy 2020-02-20 00:00:00 None
Italy 2020-02-21 00:00:00 1
Italy 2020-02-22 00:00:00 2
Italy 2020-02-23 00:00:00 3
Italy 2020-02-24 00:00:00 7
Italy 2020-02-25 00:00:00 10
Italy 2020-02-26 00:00:00 12
Italy 2020-02-27 00:00:00 17
Italy 2020-02-28 00:00:00 21
Italy 2020-02-29 00:00:00 29
Italy 2020-03-01 00:00:00 34
Italy 2020-03-02 00:00:00 52
Italy 2020-03-03 00:00:00 79
Italy

Italy 2021-01-10 00:00:00 78755
Italy 2021-01-11 00:00:00 79203
Italy 2021-01-12 00:00:00 79819
Italy 2021-01-13 00:00:00 80326
Italy 2021-01-14 00:00:00 80848
Italy 2021-01-15 00:00:00 81325
Italy 2021-01-16 00:00:00 81800
Italy 2021-01-17 00:00:00 82177
Italy 2021-01-18 00:00:00 82554
Italy 2021-01-19 00:00:00 83157
Italy 2021-01-20 00:00:00 83681
Italy 2021-01-21 00:00:00 84202
Italy 2021-01-22 00:00:00 84674
Italy 2021-01-23 00:00:00 85162
Italy 2021-01-24 00:00:00 85461
Italy 2021-01-25 00:00:00 85881
Italy 2021-01-26 00:00:00 86422
Italy 2021-01-27 00:00:00 86889
Italy 2021-01-28 00:00:00 87381
Italy 2021-01-29 00:00:00 87858
Italy 2021-01-30 00:00:00 88279
Italy 2021-01-31 00:00:00 88516
Italy 2021-02-01 00:00:00 88845
Italy 2021-02-02 00:00:00 89344
Italy 2021-02-03 00:00:00 89820
Italy 2021-02-04 00:00:00 90241
Italy 2021-02-05 00:00:00 90618
Italy 2021-02-06 00:00:00 91003
Italy 2021-02-07 00:00:00 91273
Italy 2021-02-08 00:00:00 91580
Italy 2021-02-09 00:00:00 92002
Italy 20

In [61]:
# Morti in un giorno in Italia, Argentina e Stati Uniti
import sqlalchemy

states = ['Italy', 'Argentina', 'United States']
statement_4 = select([covid_deaths]).where(covid_deaths.columns.location.in_(states))

for state in connection.execute(statement_4):
    print(state.location, state.day_of_year, state.total_deaths)

Argentina 2021-01-22 00:00:00 46575
Argentina 2021-01-23 00:00:00 46737
Argentina 2021-01-24 00:00:00 46827
Argentina 2021-01-25 00:00:00 47034
Argentina 2021-01-26 00:00:00 47253
Argentina 2021-01-27 00:00:00 47435
Argentina 2021-01-28 00:00:00 47601
Argentina 2021-01-29 00:00:00 47775
Argentina 2021-01-30 00:00:00 47931
Argentina 2021-01-31 00:00:00 47974
Argentina 2021-02-01 00:00:00 48249
Argentina 2021-02-02 00:00:00 48426
Argentina 2021-02-03 00:00:00 48539
Argentina 2021-02-04 00:00:00 48700
Argentina 2021-02-05 00:00:00 48985
Argentina 2021-02-06 00:00:00 49110
Argentina 2021-02-07 00:00:00 49171
Argentina 2021-02-08 00:00:00 49398
Argentina 2021-02-09 00:00:00 49566
Argentina 2021-02-10 00:00:00 49674
Argentina 2021-02-11 00:00:00 49874
Argentina 2021-02-12 00:00:00 50029
Argentina 2021-02-13 00:00:00 50188
Argentina 2021-02-14 00:00:00 50236
Argentina 2021-02-15 00:00:00 50327
Argentina 2021-02-16 00:00:00 50432
Argentina 2021-02-17 00:00:00 50616
Argentina 2021-02-18 00:00:0

Argentina 2020-08-20 00:00:00 6517
Argentina 2020-08-21 00:00:00 6730
Argentina 2020-08-22 00:00:00 6848
Argentina 2020-08-23 00:00:00 6985
Argentina 2020-08-24 00:00:00 7366
Argentina 2020-08-25 00:00:00 7563
Argentina 2020-08-26 00:00:00 7839
Argentina 2020-08-27 00:00:00 8050
Argentina 2020-08-28 00:00:00 8271
Argentina 2020-08-29 00:00:00 8353
Argentina 2020-08-30 00:00:00 8457
Argentina 2020-08-31 00:00:00 8660
Argentina 2020-09-01 00:00:00 8919
Argentina 2020-09-02 00:00:00 9118
Argentina 2020-09-03 00:00:00 9361
Argentina 2020-09-04 00:00:00 9623
Argentina 2020-09-05 00:00:00 9739
Argentina 2020-09-06 00:00:00 9859
Argentina 2020-09-07 00:00:00 10129
Argentina 2020-09-08 00:00:00 10405
Argentina 2020-09-09 00:00:00 10658
Argentina 2020-09-10 00:00:00 10907
Argentina 2020-09-11 00:00:00 11148
Argentina 2020-09-12 00:00:00 11263
Argentina 2020-09-13 00:00:00 11352
Argentina 2020-09-14 00:00:00 11667
Argentina 2020-09-15 00:00:00 11852
Argentina 2020-09-16 00:00:00 12116
Argentina 

United States 2020-08-02 00:00:00 155694
United States 2020-08-03 00:00:00 156276
United States 2020-08-04 00:00:00 157558
United States 2020-08-05 00:00:00 158997
United States 2020-08-06 00:00:00 160242
United States 2020-08-07 00:00:00 161479
United States 2020-08-08 00:00:00 162557
United States 2020-08-09 00:00:00 163081
United States 2020-08-10 00:00:00 163716
United States 2020-08-11 00:00:00 164736
United States 2020-08-12 00:00:00 166234
United States 2020-08-13 00:00:00 167302
United States 2020-08-14 00:00:00 168639
United States 2020-08-15 00:00:00 169639
United States 2020-08-16 00:00:00 170257
United States 2020-08-17 00:00:00 170750
United States 2020-08-18 00:00:00 171977
United States 2020-08-19 00:00:00 173315
United States 2020-08-20 00:00:00 174419
United States 2020-08-21 00:00:00 175510
United States 2020-08-22 00:00:00 176419
United States 2020-08-23 00:00:00 176964
United States 2020-08-24 00:00:00 177439
United States 2020-08-25 00:00:00 178674
United States 20

United States 2021-03-11 00:00:00 532712
United States 2021-03-12 00:00:00 534280
United States 2021-03-13 00:00:00 535280
United States 2021-03-14 00:00:00 535862
United States 2021-03-15 00:00:00 536619
United States 2021-03-16 00:00:00 537799
United States 2021-03-17 00:00:00 539035
United States 2021-03-18 00:00:00 540699
United States 2021-03-19 00:00:00 541848
United States 2021-03-20 00:00:00 542594
United States 2021-03-21 00:00:00 543035
United States 2021-03-22 00:00:00 543754
United States 2021-03-23 00:00:00 544631
United States 2021-03-24 00:00:00 546140
United States 2021-03-25 00:00:00 547551
United States 2021-03-26 00:00:00 548703
United States 2021-03-27 00:00:00 549491
United States 2021-03-28 00:00:00 550015
United States 2021-03-29 00:00:00 550729
United States 2021-03-30 00:00:00 551616
United States 2021-03-31 00:00:00 552709
United States 2021-04-01 00:00:00 553785
United States 2021-04-02 00:00:00 554738
United States 2021-04-03 00:00:00 555476
United States 20

Italy 2020-11-05 00:00:00 40192
Italy 2020-11-06 00:00:00 40638
Italy 2020-11-07 00:00:00 41063
Italy 2020-11-08 00:00:00 41394
Italy 2020-11-09 00:00:00 41750
Italy 2020-11-10 00:00:00 42330
Italy 2020-11-11 00:00:00 42953
Italy 2020-11-12 00:00:00 43589
Italy 2020-11-13 00:00:00 44139
Italy 2020-11-14 00:00:00 44683
Italy 2020-11-15 00:00:00 45229
Italy 2020-11-16 00:00:00 45733
Italy 2020-11-17 00:00:00 46464
Italy 2020-11-18 00:00:00 47217
Italy 2020-11-19 00:00:00 47870
Italy 2020-11-20 00:00:00 48569
Italy 2020-11-21 00:00:00 49261
Italy 2020-11-22 00:00:00 49823
Italy 2020-11-23 00:00:00 50453
Italy 2020-11-24 00:00:00 51306
Italy 2020-11-25 00:00:00 52028
Italy 2020-11-26 00:00:00 52850
Italy 2020-11-27 00:00:00 53677
Italy 2020-11-28 00:00:00 54363
Italy 2020-11-29 00:00:00 54904
Italy 2020-11-30 00:00:00 55576
Italy 2020-12-01 00:00:00 56361
Italy 2020-12-02 00:00:00 57045
Italy 2020-12-03 00:00:00 58038
Italy 2020-12-04 00:00:00 58852
Italy 2020-12-05 00:00:00 59514
Italy 20

In [64]:
# Giorni con più di 500 morti in Italia
from sqlalchemy import and_

statement_5 = select([covid_deaths]).where(
    and_(covid_deaths.columns.location == 'Italy',
         covid_deaths.columns.new_deaths >= 500
        )
)

for result in connection.execute(statement_5):
    print(result.location, result.day_of_year, result.new_deaths)

Italy 2020-03-20 00:00:00 627
Italy 2020-03-21 00:00:00 793
Italy 2020-03-22 00:00:00 651
Italy 2020-03-23 00:00:00 601
Italy 2020-03-24 00:00:00 743
Italy 2020-03-25 00:00:00 683
Italy 2020-03-26 00:00:00 712
Italy 2020-03-27 00:00:00 919
Italy 2020-03-28 00:00:00 889
Italy 2020-03-29 00:00:00 756
Italy 2020-03-30 00:00:00 812
Italy 2020-03-31 00:00:00 837
Italy 2020-04-01 00:00:00 727
Italy 2020-04-02 00:00:00 760
Italy 2020-04-03 00:00:00 766
Italy 2020-04-04 00:00:00 681
Italy 2020-04-05 00:00:00 525
Italy 2020-04-06 00:00:00 636
Italy 2020-04-07 00:00:00 604
Italy 2020-04-08 00:00:00 542
Italy 2020-04-09 00:00:00 610
Italy 2020-04-10 00:00:00 570
Italy 2020-04-11 00:00:00 619
Italy 2020-04-13 00:00:00 566
Italy 2020-04-14 00:00:00 602
Italy 2020-04-15 00:00:00 578
Italy 2020-04-16 00:00:00 525
Italy 2020-04-17 00:00:00 575
Italy 2020-04-21 00:00:00 534
Italy 2020-11-10 00:00:00 580
Italy 2020-11-11 00:00:00 623
Italy 2020-11-12 00:00:00 636
Italy 2020-11-13 00:00:00 550
Italy 2020