In [None]:
### ALL CODES BELOW (AS A UNITY) WORK ONLY FOR THE LOCAL INSTANCE IN SQL CONNECTIONS (HOME OF MYSQLWORKBENCH)!
### This complete file NEEDS to be run before adding updated information to SQL database tables in the future.
### (For example: updated population, weather, or airports and flight information)
### Note: getting updated information would require running the separate files, as needed (i.e. weather to get current weather info)

#IMPORTANT:
### RUN ALL CELLS ONLY WHEN HAVE RUN SQL QUERY (= HAVE DELETED AND RECREATED SQL DATABASE AND TABLES) AND WANT
### BASIS INFORMATION ON WHICH COULD BE ADDED ON IN THE FUTURE.

In [None]:
cities = ['City1', 'City2', 'City3'] #replace City1, City2...with actual city names

In [5]:
# install if needed:
# !pip install sqlalchemy
# !pip install pymysql

# libraries used:
import get_connection_string as gcs
import get_city_data as gcd
import get_population_data as gpd
import get_weather_data as gwd
import get_airport_codes as gac
import get_flight_arrivals as gfa
import pandas as pd
import requests

In [6]:
connection_string = gcs.get_connection_string()

## Get_city information

In [10]:
city_scraping_df=gcd.get_city_data(cities)

In [11]:
city_pop_scraping_df=gpd.get_population_data(cities)

### Countries

In [14]:
countries_df = city_scraping_df['country'].unique()
countries_df = pd.DataFrame({"country": countries_df})

In [15]:
countries_df.to_sql('countries',
                  if_exists='append',
                  con=connection_string,
                  index=False)

1

In [19]:
countries_from_sql = pd.read_sql("countries", con=connection_string)

### Cities

In [22]:
merged_countries_df = city_scraping_df.merge(countries_from_sql,
                                             on = "country",
                                             how = "left")

In [24]:
cities_df = merged_countries_df.drop(columns=["country"])

In [26]:
cities_df.to_sql('cities',
                  if_exists='append',
                  con=connection_string,
                  index=False)

9

### Population

In [9]:
cities_from_sql = pd.read_sql("cities", con=connection_string)

In [None]:
merged_population = cities_from_sql.merge(city_pop_scraping_df,
                                   on = "city",
                                   how="left")

In [None]:
population_df = merged_population[['city_id','population','year']]

In [None]:
population_df.to_sql('population',
                  if_exists='append',
                  con=connection_string,
                  index=False)

3

In [None]:
population_from_sql = pd.read_sql("population", con=connection_string) 

## Get_weather

In [None]:
cities = cities_from_sql["city"].to_list()

In [None]:
city_weather_df = gwd.get_weather_data(cities)


In [None]:
merged_weather_df = city_weather_df.merge(cities_from_sql,
                                   on = "city",
                                   how="left")

In [None]:
weather_df = merged_weather_df.drop(columns=["city","country_id", "latitude","longitude"])

In [None]:
weather_df['forecast_time']=pd.to_datetime(weather_df['forecast_time'])

In [None]:
weather_df.to_sql('weather',
                  if_exists='append',
                  con=connection_string,
                  index=False)

120

In [None]:
weather_from_sql = pd.read_sql("weather", con=connection_string)

## Get_airports

In [None]:
city_latitude = cities_from_sql['latitude'].to_list()
city_longitude = cities_from_sql['longitude'].to_list()

In [None]:
airports_df = gac.get_airport_codes(city_latitude, city_longitude)

In [None]:
merged_airport_df = pd.merge(airports_df, cities_from_sql,
                                   on = ['latitude', 'longitude'],
                                   how="left")

In [None]:
airport_df= merged_airport_df.drop(columns=["latitude", "longitude","city","country_id"])

In [None]:
airport_df.to_sql('airports',
                  if_exists='append',
                  con=connection_string,
                  index=False)

8

In [None]:
airports_from_sql = pd.read_sql("airports", con=connection_string)

In [None]:
airport_icao = airports_from_sql['icao_code'].to_list()

## Get_flights

In [None]:
flights_df = gfa.get_flight_arrivals(airport_icao)

In [None]:
merged_flights_df = flights_df.merge(airports_from_sql,
                                   on = "icao_code",
                                   how="left")

In [None]:
flights_df = merged_flights_df.drop(columns=["icao_code","iata_code", "airport_name","city_name", "city_id"])

In [None]:
flights_df['arrival_times']=pd.to_datetime(flights_df['arrival_times'])

In [None]:
flights_df.to_sql('flights',
                   if_exists='append',
                   con=connection_string,
                   index=False)

210

In [None]:
flights_from_sql = pd.read_sql("flights", con=connection_string)