# Air quality analysis in Barcelona

## 1. Data import and cleaning

In [1]:
## import libraries

import pandas as pd 
import sqlalchemy 
import datetime 

In [2]:
raw_data = pd.read_csv('air_quality_Nov2017.csv')

In [3]:
raw_data.head()

Unnamed: 0,Station,Air Quality,Longitude,Latitude,O3 Hour,O3 Quality,O3 Value,NO2 Hour,NO2 Quality,NO2 Value,PM10 Hour,PM10 Quality,PM10 Value,Generated,Date Time
0,Barcelona - Sants,Good,2.1331,41.3788,,,,0h,Good,84.0,,,,01/11/2018 0:00,1541027104
1,Barcelona - Eixample,Moderate,2.1538,41.3853,0h,Good,1.0,0h,Moderate,113.0,0h,Good,36.0,01/11/2018 0:00,1541027104
2,Barcelona - Gràcia,Good,2.1534,41.3987,0h,Good,10.0,0h,Good,73.0,,,,01/11/2018 0:00,1541027104
3,Barcelona - Ciutadella,Good,2.1874,41.3864,0h,Good,2.0,0h,Good,86.0,,,,01/11/2018 0:00,1541027104
4,Barcelona - Vall Hebron,Good,2.148,41.4261,0h,Good,7.0,0h,Good,69.0,,,,01/11/2018 0:00,1541027104


We remove the columns that are not needed for our analysis. In this case, the columns that are needed are: 
- Station
- Air Quality 
- O3 Quality
- 03 Value
- NO2 Quality
- NO2 Value
- PM10 Quality
- PM10 Value 
- Generated 
- Date Time 

In [4]:
print(raw_data.columns.tolist())

['Station', 'Air Quality', 'Longitude', 'Latitude', 'O3 Hour', 'O3 Quality', 'O3 Value', 'NO2 Hour', 'NO2 Quality', 'NO2 Value', 'PM10 Hour', 'PM10 Quality', 'PM10 Value', 'Generated', 'Date Time']


In [5]:
raw_data_selected = raw_data.copy()

raw_data_selected = raw_data_selected[['Station', 'Air Quality', 'O3 Quality', 'O3 Value', 'NO2 Quality', 'NO2 Value', 'PM10 Quality', 'PM10 Value', 'Generated']]

raw_data_selected.head()

Unnamed: 0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value,Generated
0,Barcelona - Sants,Good,,,Good,84.0,,,01/11/2018 0:00
1,Barcelona - Eixample,Moderate,Good,1.0,Moderate,113.0,Good,36.0,01/11/2018 0:00
2,Barcelona - Gràcia,Good,Good,10.0,Good,73.0,,,01/11/2018 0:00
3,Barcelona - Ciutadella,Good,Good,2.0,Good,86.0,,,01/11/2018 0:00
4,Barcelona - Vall Hebron,Good,Good,7.0,Good,69.0,,,01/11/2018 0:00


### Analyzing the type of each attribute of the dataset

In [6]:

raw_data_selected.dtypes

Station          object
Air Quality      object
O3 Quality       object
O3 Value        float64
NO2 Quality      object
NO2 Value       float64
PM10 Quality     object
PM10 Value      float64
Generated        object
dtype: object

### Changing type of date to DateTime

In [7]:
raw_data_selected['date_corrected'] = pd.to_datetime(raw_data_selected["Generated"])

raw_data_selected.set_index("date_corrected", inplace=True) # Inplace reassign dataset to indexed dataset. Is as if dataset_NEW=dataset_OLD

raw_data_selected = raw_data_selected.drop(["Generated"], axis=1)

In [16]:
raw_data_selected.index

DatetimeIndex(['2018-01-11 00:00:00', '2018-01-11 00:00:00',
               '2018-01-11 00:00:00', '2018-01-11 00:00:00',
               '2018-01-11 00:00:00', '2018-01-11 00:00:00',
               '2018-01-11 00:00:00', '2018-01-11 00:00:00',
               '2018-01-11 01:00:00', '2018-01-11 01:00:00',
               ...
               '2018-11-30 22:00:00', '2018-11-30 22:00:00',
               '2018-11-30 23:00:00', '2018-11-30 23:00:00',
               '2018-11-30 23:00:00', '2018-11-30 23:00:00',
               '2018-11-30 23:00:00', '2018-11-30 23:00:00',
               '2018-11-30 23:00:00', '2018-11-30 23:00:00'],
              dtype='datetime64[ns]', name='date_corrected', length=5744, freq=None)

In [19]:
raw_data_selected

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-11 00:00:00,Barcelona - Sants,Good,,,Good,84.0,,
2018-01-11 00:00:00,Barcelona - Eixample,Moderate,Good,1.0,Moderate,113.0,Good,36.0
2018-01-11 00:00:00,Barcelona - Gràcia,Good,Good,10.0,Good,73.0,,
2018-01-11 00:00:00,Barcelona - Ciutadella,Good,Good,2.0,Good,86.0,,
2018-01-11 00:00:00,Barcelona - Vall Hebron,Good,Good,7.0,Good,69.0,,
2018-01-11 00:00:00,Barcelona - Palau Reial,Good,Good,11.0,Good,57.0,Good,23.0
2018-01-11 00:00:00,Barcelona - Poblenou,Good,,,Good,86.0,Good,32.0
2018-01-11 00:00:00,Barcelona - Observ Fabra,Good,Good,58.0,Good,3.0,Good,25.0
2018-01-11 01:00:00,Barcelona - Sants,Good,,,Good,62.0,,
2018-01-11 01:00:00,Barcelona - Eixample,Good,Good,6.0,Good,80.0,Good,35.0


### General description in Barcelona

In [20]:
raw_data_selected.describe()

Unnamed: 0,O3 Value,NO2 Value,PM10 Value
count,4101.0,5460.0,3647.0
mean,34.082907,35.740293,16.590074
std,22.960687,22.357262,8.065424
min,1.0,1.0,2.0
25%,14.0,17.0,10.0
50%,34.0,33.0,15.0
75%,52.0,52.0,22.0
max,100.0,117.0,44.0


From this table we can say that we have missing values in the general table. However, a general description of Barcelona can be done. The average values for November 2018 in Barcelona are: 
- O3: 34 ug/cm3
- NO2: 35.74 ug/cm3
- PM10: 16.59 ug/cm3

In average terms, Barcelona had a very low qualitative name for O3, very low for NO2 and low for PM10. But, what is the situation in Barcelona? 

### Where has the maximum value of pollution happened in Barcelona? 

In [28]:
# For O3

raw_data_selected[raw_data_selected['O3 Value']==raw_data_selected['O3 Value'].max()]

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-11-29 22:00:00,Barcelona - Observ Fabra,Good,Good,100.0,Good,4.0,Good,13.0


In [29]:
# For NO2

raw_data_selected[raw_data_selected['NO2 Value']==raw_data_selected['NO2 Value'].max()]

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-11-22 11:00:00,Barcelona - Gràcia,Moderate,Good,3.0,Moderate,117.0,Good,12.0


In [31]:
# For PM10
raw_data_selected[raw_data_selected['NO2 Value']==raw_data_selected['NO2 Value'].max()]

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-11-22 11:00:00,Barcelona - Gràcia,Moderate,Good,3.0,Moderate,117.0,Good,12.0


### Missing values

There are several ways to deal with missing values, in this case NaN values.
First of all, we need to know how many NaN values we have in our dataset. 

In [9]:
raw_data_selected.isna().sum()

Station            0
Air Quality        0
O3 Quality      1476
O3 Value        1643
NO2 Quality       55
NO2 Value        284
PM10 Quality    2022
PM10 Value      2097
dtype: int64

#### Method 1: dropping all the NaN values of the entire dataset. 

In this case we have removed all the rows that have a "NaN" value there. If we perform an analysis of what we have done, we can see the total amount of data we have lost 

In [10]:
before_rows = raw_data_selected.shape[0]
print(f"Number of rows before dropping all the NaN values: {before_rows}")

Number of rows before dropping all the NaN values: 5744


In [11]:
clean_air_quality =  raw_data_selected.copy()
clean_air_quality = raw_data_selected.dropna()
clean_air_quality.head()

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-11 00:00:00,Barcelona - Eixample,Moderate,Good,1.0,Moderate,113.0,Good,36.0
2018-01-11 00:00:00,Barcelona - Palau Reial,Good,Good,11.0,Good,57.0,Good,23.0
2018-01-11 00:00:00,Barcelona - Observ Fabra,Good,Good,58.0,Good,3.0,Good,25.0
2018-01-11 01:00:00,Barcelona - Eixample,Good,Good,6.0,Good,80.0,Good,35.0
2018-01-11 01:00:00,Barcelona - Palau Reial,Good,Good,27.0,Good,38.0,Good,24.0


In [12]:
after_rows = clean_air_quality.shape[0]
print (f"Number of rows after dropping all the NaN values: {after_rows}")

Number of rows after dropping all the NaN values: 2853


In [13]:
data_lost_perc = ((before_rows - after_rows)/before_rows)*100
print(f"The percentage of data that is NaN and has been removed is {data_lost_perc}")


The percentage of data that is NaN and has been removed is 50.330779944289695


If we do it like this, we remove half of the data. 

#### Method 2: Instead of NaN removal, values interpolation 

#### Method 3: Instead of NaN removal, choose the last value of each station 

## Database labeling 

In [55]:
labels = {"Barcelona - Ciutadella":1, "Barcelona - Eixample":2, "Barcelona - Gràcia":3, "Barcelona - Palau Reial":4, 
         "Barcelona - Poblenou": 5, "Barcelona - Sants": 6, "Barcelona - Vall Hebron": 7, 
         "Barcelona - Observ Fabra":8  }

In [38]:
raw_data_selected.head()

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-11,Barcelona - Sants,Good,,,Good,84.0,,
2018-01-11,Barcelona - Eixample,Moderate,Good,1.0,Moderate,113.0,Good,36.0
2018-01-11,Barcelona - Gràcia,Good,Good,10.0,Good,73.0,,
2018-01-11,Barcelona - Ciutadella,Good,Good,2.0,Good,86.0,,
2018-01-11,Barcelona - Vall Hebron,Good,Good,7.0,Good,69.0,,


In [64]:
station_ID=[labels[i] for i in raw_data_selected["Station"]]

raw_data_selected["Station_ID"] = station_ID

raw_data_selected.head()

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value,Station_ID
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-11,Barcelona - Sants,Good,,,Good,84.0,,,6
2018-01-11,Barcelona - Eixample,Moderate,Good,1.0,Moderate,113.0,Good,36.0,2
2018-01-11,Barcelona - Gràcia,Good,Good,10.0,Good,73.0,,,3
2018-01-11,Barcelona - Ciutadella,Good,Good,2.0,Good,86.0,,,1
2018-01-11,Barcelona - Vall Hebron,Good,Good,7.0,Good,69.0,,,7


In [65]:
raw_data_selected = raw_data_selected.drop("Station", axis = 1)

In [68]:
raw_data_selected.head()

Unnamed: 0_level_0,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value,Station_ID
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-11,Good,,,Good,84.0,,,6
2018-01-11,Moderate,Good,1.0,Moderate,113.0,Good,36.0,2
2018-01-11,Good,Good,10.0,Good,73.0,,,3
2018-01-11,Good,Good,2.0,Good,86.0,,,1
2018-01-11,Good,Good,7.0,Good,69.0,,,7


In [69]:
news_columns = ["Station_ID", 'Air Quality', 'O3 Quality', 'O3 Value', 'NO2 Quality', 'NO2 Value', 'PM10 Quality', 'PM10 Value']

In [83]:
Barcelona_air_quality = raw_data_selected.reindex(columns=news_columns)

Barcelona_air_quality.head()

Unnamed: 0_level_0,Station_ID,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-11,6,Good,,,Good,84.0,,
2018-01-11,2,Moderate,Good,1.0,Moderate,113.0,Good,36.0
2018-01-11,3,Good,Good,10.0,Good,73.0,,
2018-01-11,1,Good,Good,2.0,Good,86.0,,
2018-01-11,7,Good,Good,7.0,Good,69.0,,


### Secondary table for Station ID

In [77]:
station_info = pd.read_csv('air_stations_Nov2017.csv')
station_info

Unnamed: 0,Station,Longitude,Latitude,Ubication,District Name,Neighborhood Name
0,Barcelona - Ciutadella,2.1874,41.3864,Parc de la Ciutadella,Ciutat Vella,"Sant Pere, Santa Caterina i la Ribera"
1,Barcelona - Eixample,2.1538,41.3853,Av. Roma - c/ Comte Urgell,Eixample,la Nova Esquerra de l'Eixample
2,Barcelona - Gràcia,2.1534,41.3987,Plaça Gal·la Placídia (Via Augusta - Travesser...,Gracia,la Vila de Gracia
3,Barcelona - Palau Reial,2.1151,41.3875,c/ John Maynard Keynes - c/ de Jordi Girona,Les Corts,Pedralbes
4,Barcelona - Poblenou,2.2045,41.4039,Plaça Josep Trueta (Pujades - Lope de Vega),Sant Marti,el Poblenou
5,Barcelona - Sants,2.1331,41.3788,Jardins de Can Mantega (Joan Güell - Violant d...,Sants-Montjuic,Sants
6,Barcelona - Vall Hebron,2.148,41.4261,Parc de la Vall d’Hebron - c/ Martí Codolar - ...,Horta-Guinardo,la Vall d'Hebron
7,Barcelona – Observ,2.1211,41.4176,"Ctra Observatori Fabra, 27",Sarrià-Sant Gervasi,"Vallvidrera, el Tibidabo i les Planes"


In [79]:
station_ID_num= [1,2,3,4,5,6,7,8]

station_info['station_ID']= station_ID_num
station_info

Unnamed: 0,Station,Longitude,Latitude,Ubication,District Name,Neighborhood Name,station_ID
0,Barcelona - Ciutadella,2.1874,41.3864,Parc de la Ciutadella,Ciutat Vella,"Sant Pere, Santa Caterina i la Ribera",1
1,Barcelona - Eixample,2.1538,41.3853,Av. Roma - c/ Comte Urgell,Eixample,la Nova Esquerra de l'Eixample,2
2,Barcelona - Gràcia,2.1534,41.3987,Plaça Gal·la Placídia (Via Augusta - Travesser...,Gracia,la Vila de Gracia,3
3,Barcelona - Palau Reial,2.1151,41.3875,c/ John Maynard Keynes - c/ de Jordi Girona,Les Corts,Pedralbes,4
4,Barcelona - Poblenou,2.2045,41.4039,Plaça Josep Trueta (Pujades - Lope de Vega),Sant Marti,el Poblenou,5
5,Barcelona - Sants,2.1331,41.3788,Jardins de Can Mantega (Joan Güell - Violant d...,Sants-Montjuic,Sants,6
6,Barcelona - Vall Hebron,2.148,41.4261,Parc de la Vall d’Hebron - c/ Martí Codolar - ...,Horta-Guinardo,la Vall d'Hebron,7
7,Barcelona – Observ,2.1211,41.4176,"Ctra Observatori Fabra, 27",Sarrià-Sant Gervasi,"Vallvidrera, el Tibidabo i les Planes",8


In [82]:
station_info.set_index("station_ID")

Unnamed: 0_level_0,Station,Longitude,Latitude,Ubication,District Name,Neighborhood Name
station_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Barcelona - Ciutadella,2.1874,41.3864,Parc de la Ciutadella,Ciutat Vella,"Sant Pere, Santa Caterina i la Ribera"
2,Barcelona - Eixample,2.1538,41.3853,Av. Roma - c/ Comte Urgell,Eixample,la Nova Esquerra de l'Eixample
3,Barcelona - Gràcia,2.1534,41.3987,Plaça Gal·la Placídia (Via Augusta - Travesser...,Gracia,la Vila de Gracia
4,Barcelona - Palau Reial,2.1151,41.3875,c/ John Maynard Keynes - c/ de Jordi Girona,Les Corts,Pedralbes
5,Barcelona - Poblenou,2.2045,41.4039,Plaça Josep Trueta (Pujades - Lope de Vega),Sant Marti,el Poblenou
6,Barcelona - Sants,2.1331,41.3788,Jardins de Can Mantega (Joan Güell - Violant d...,Sants-Montjuic,Sants
7,Barcelona - Vall Hebron,2.148,41.4261,Parc de la Vall d’Hebron - c/ Martí Codolar - ...,Horta-Guinardo,la Vall d'Hebron
8,Barcelona – Observ,2.1211,41.4176,"Ctra Observatori Fabra, 27",Sarrià-Sant Gervasi,"Vallvidrera, el Tibidabo i les Planes"


## Data splitting per station

Stations we have in Barcelona: 
1. Barcelona - Ciutadella 
2. Barcelona - Eixample
3. Barcelona - Gràcia 
4. Barcelona - Palau Reial
5. Barcelona - Poblenou
6. Barcelona - Sants
7. Barcelona - Vall Hebron
8. Barcelona - Vallvidrera, el Tibidabo i les Planes

#### BCN- Ciutadella 

In [33]:
BCN_Ciutadella = clean_air_quality[clean_air_quality["Station"]=="Barcelona - Ciutadella"]

BCN_Ciutadella.head()

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


#### BCN-Eixample

In [34]:
#BCN_Sants = clean_air_quality[clean_air_quality["Station"] == "Barcelona - Sants"]

#BCN_Sants

#BCN_Eixample = 

#clean_air_quality.head()

BCN_Eixample = clean_air_quality[clean_air_quality["Station"]=="Barcelona - Eixample"]

BCN_Eixample.head()

Unnamed: 0_level_0,Station,Air Quality,O3 Quality,O3 Value,NO2 Quality,NO2 Value,PM10 Quality,PM10 Value
date_corrected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-11 00:00:00,Barcelona - Eixample,Moderate,Good,1.0,Moderate,113.0,Good,36.0
2018-01-11 01:00:00,Barcelona - Eixample,Good,Good,6.0,Good,80.0,Good,35.0
2018-01-11 02:00:00,Barcelona - Eixample,Good,Good,14.0,Good,64.0,Good,34.0
2018-01-11 03:00:00,Barcelona - Eixample,Good,Good,32.0,Good,44.0,Good,35.0
2018-01-11 04:00:00,Barcelona - Eixample,Good,Good,43.0,Good,34.0,Good,34.0


In [36]:
station_links = pd.read_csv('air_stations_Nov2017.csv')
station_links

Unnamed: 0,Station,Longitude,Latitude,Ubication,District Name,Neighborhood Name
0,Barcelona - Ciutadella,2.1874,41.3864,Parc de la Ciutadella,Ciutat Vella,"Sant Pere, Santa Caterina i la Ribera"
1,Barcelona - Eixample,2.1538,41.3853,Av. Roma - c/ Comte Urgell,Eixample,la Nova Esquerra de l'Eixample
2,Barcelona - Gràcia,2.1534,41.3987,Plaça Gal·la Placídia (Via Augusta - Travesser...,Gracia,la Vila de Gracia
3,Barcelona - Palau Reial,2.1151,41.3875,c/ John Maynard Keynes - c/ de Jordi Girona,Les Corts,Pedralbes
4,Barcelona - Poblenou,2.2045,41.4039,Plaça Josep Trueta (Pujades - Lope de Vega),Sant Marti,el Poblenou
5,Barcelona - Sants,2.1331,41.3788,Jardins de Can Mantega (Joan Güell - Violant d...,Sants-Montjuic,Sants
6,Barcelona - Vall Hebron,2.148,41.4261,Parc de la Vall d’Hebron - c/ Martí Codolar - ...,Horta-Guinardo,la Vall d'Hebron
7,"Barcelona - Vallvidrera, el Tibidabo i les Planes",2.1211,41.4176,"Ctra Observatori Fabra, 27",Sarrià-Sant Gervasi,"Vallvidrera, el Tibidabo i les Planes"


## 2. Database connection

In [None]:
## Import libraries

from sqlalchemy import create_engine


In [None]:
driver = 'mysql+pymysql:'
user = 'owner'
password = 'ir0n-h4ck'
ip = '104.197.101.244'
database = 'project-week-2-barcelona'


In [None]:
connection_string = f'{driver}//{user}:{password}@{ip}/{database}'

In [None]:
engine = create_engine(connection_string)

In [None]:
#query = """
 #       SELECT * FROM authors
#"""

#query = """
#        CREATE TABLE kerim_table
#        SELECT * FROM authors
#"""
#df_db = pd.read_sql(query, engine)

In [None]:
#df_db = pd.read_sql(query, engine)

#print(query)


