##### Introducing Pandas
Gathering data about US cities from here:
    https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population

In [1]:
import pandas as pd

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population'
tables = pd.read_html(url)

In [3]:
# the first table, not the one we want
tables[0]

Unnamed: 0,0
0,Map all coordinates using: OpenStreetMap
1,Download coordinates as: KML · GPX


In [4]:
# table 4 is the one we want
tables[4][0:2]

Unnamed: 0,2018rank,City,State[c],2018estimate,2010Census,Change,2016 land area,2016 land area.1,2016 population density,2016 population density.1,Location
0,1,New York[d],New York,8398748,8175133,+2.74%,301.5 sq mi,780.9 km2,"28,317/sq mi","10,933/km2",40°39′49″N 73°56′19″W﻿ / ﻿40.6635°N 73.9387°W
1,2,Los Angeles,California,3990456,3792621,+5.22%,468.7 sq mi,"1,213.9 km2","8,484/sq mi","3,276/km2",34°01′10″N 118°24′39″W﻿ / ﻿34.0194°N 118.4108°W


In [5]:
# Lets store this table in a variable called cities_table
cities_table = tables[4]
# This is now stored as a dataframe
cities_table[:2]

Unnamed: 0,2018rank,City,State[c],2018estimate,2010Census,Change,2016 land area,2016 land area.1,2016 population density,2016 population density.1,Location
0,1,New York[d],New York,8398748,8175133,+2.74%,301.5 sq mi,780.9 km2,"28,317/sq mi","10,933/km2",40°39′49″N 73°56′19″W﻿ / ﻿40.6635°N 73.9387°W
1,2,Los Angeles,California,3990456,3792621,+5.22%,468.7 sq mi,"1,213.9 km2","8,484/sq mi","3,276/km2",34°01′10″N 118°24′39″W﻿ / ﻿34.0194°N 118.4108°W


In [6]:
# we can convert this dataframe to a list of dictionaries
cities = cities_table.to_dict('records')
cities[0:2]

[{'2018rank': 1,
  'City': 'New York[d]',
  'State[c]': 'New York',
  '2018estimate': 8398748,
  '2010Census': 8175133,
  'Change': '+2.74%',
  '2016 land area': '301.5\xa0sq\xa0mi',
  '2016 land area.1': '780.9\xa0km2',
  '2016 population density': '28,317/sq\xa0mi',
  '2016 population density.1': '10,933/km2',
  'Location': '40°39′49″N 73°56′19″W\ufeff / \ufeff40.6635°N 73.9387°W'},
 {'2018rank': 2,
  'City': 'Los Angeles',
  'State[c]': 'California',
  '2018estimate': 3990456,
  '2010Census': 3792621,
  'Change': '+5.22%',
  '2016 land area': '468.7\xa0sq\xa0mi',
  '2016 land area.1': '1,213.9\xa0km2',
  '2016 population density': '8,484/sq\xa0mi',
  '2016 population density.1': '3,276/km2',
  'Location': '34°01′10″N 118°24′39″W\ufeff / \ufeff34.0194°N 118.4108°W'}]

In [9]:
# Now we go through each dictionary in our list and select only the data we want,
# and add it to a list

populations = []

for each_city in cities:
    city_pop = each_city['2018estimate']
    populations.append(city_pop)
    
city_names = []

for each_city in cities:
    city_name = each_city['City']
    city_names.append(city_name)
    
# and now plot
import plotly.graph_objects as go

scatter = go.Scatter(y = populations, hovertext = city_names, mode = 'markers')
go.Figure(scatter)