## Collecting data with APIs

 The magic API that provides useful statistics about many cities around the world: https://api-ninjas.com/api/city'

In [3]:
import pandas as pd
import requests
import json

In [4]:
country = "DE"
min_population = "500000"
limit = "5"
from API_keys import city_data_key as key


cities = requests.get(f"https://api.api-ninjas.com/v1/city?country={country}&min_population={min_population}&limit={limit}", headers={'X-Api-Key': key})
cities_json = cities.json()
cities_json

[{'name': 'Berlin',
  'latitude': 52.5167,
  'longitude': 13.3833,
  'country': 'DE',
  'population': 3644826,
  'is_capital': True},
 {'name': 'Hamburg',
  'latitude': 53.55,
  'longitude': 10.0,
  'country': 'DE',
  'population': 1841179,
  'is_capital': False},
 {'name': 'Munich',
  'latitude': 48.1372,
  'longitude': 11.5755,
  'country': 'DE',
  'population': 1471508,
  'is_capital': False},
 {'name': 'Cologne',
  'latitude': 50.9422,
  'longitude': 6.9578,
  'country': 'DE',
  'population': 1085664,
  'is_capital': False},
 {'name': 'Frankfurt',
  'latitude': 50.1136,
  'longitude': 8.6797,
  'country': 'DE',
  'population': 753056,
  'is_capital': False}]

In [5]:
from IPython.display import JSON

In [6]:
# makes it look nicer and easier to navigate through different dictionaries and list
JSON(cities_json)

<IPython.core.display.JSON object>

## some cleaning step

In [8]:
def clean_cities(df):    
    numeric_columns = ['latitude', 'longitude', 'population']

    for measure in numeric_columns:
        df[measure] = pd.to_numeric(df[measure])
        
    df = df.drop(["is_capital"], axis=1)
    
    print("The city data has been cleaned!")
    return df

## some renaming and reshaping of the dataframe

In [9]:
def rename_reshape(df):
    df = df.rename(columns={'name':'city_name', 'country':'country_code'})
    df['city_code']= df['city_name'].str[:3]
    df['city_code'] =df['city_code'].str.upper()
    return df

In [10]:
cities_df = pd.DataFrame(cities_json).pipe(clean_cities).pipe(rename_reshape)
cities_df

The city data has been cleaned!


Unnamed: 0,city_name,latitude,longitude,country_code,population,city_code
0,Berlin,52.5167,13.3833,DE,3644826,BER
1,Hamburg,53.55,10.0,DE,1841179,HAM
2,Munich,48.1372,11.5755,DE,1471508,MUN
3,Cologne,50.9422,6.9578,DE,1085664,COL
4,Frankfurt,50.1136,8.6797,DE,753056,FRA


In [11]:
# making the city codes consitent with Airport IATA codes for the upcoming steps.... and we are done after that
cities_df['city_code']=['BER', 'HAM', 'MUC', 'CGN', 'FRA']