In [2]:
import numpy as np
import pandas as pd 
import plotly.express as px



### US MAP

- Data Sources: https://simplemaps.com/data/us-cities

In [3]:
df = pd.read_excel("uscities.xlsx").dropna()
us_raw_map = df[['city', 'lat', 'lng', 'population']].copy()
us_raw_map.head(10)

Unnamed: 0,city,lat,lng,population
0,New York,40.6943,-73.9249,18908608.0
1,Los Angeles,34.1141,-118.4068,11922389.0
2,Chicago,41.8375,-87.6866,8497759.0
3,Miami,25.784,-80.2101,6080145.0
4,Houston,29.786,-95.3885,5970127.0
5,Dallas,32.7935,-96.7667,5830932.0
6,Philadelphia,40.0077,-75.1339,5683533.0
7,Atlanta,33.7628,-84.422,5180179.0
8,Washington,38.9047,-77.0163,5116378.0
9,Boston,42.3188,-71.0852,4328315.0


In [4]:
def transform_to_integer(float):

    return int(float)

# adjust lat & lng to x,y coordinate
us_raw_map['lat'] = us_raw_map.apply(lambda x: transform_to_integer(x['lat']), axis = 1)
us_raw_map['lng'] = us_raw_map.apply(lambda x: transform_to_integer(x['lng']), axis = 1)
us_raw_map['lng'] = us_raw_map['lng'] + np.abs( us_raw_map['lng'].min()) 

# select top 5 city out --> affect scaling and distribution too much
top_5_city_in_us = us_raw_map[:5]
top_5_city_in_us['population_level'] = 10
us_raw_map = us_raw_map[5:]

us_raw_map['scale_population'] = (us_raw_map['population'] - us_raw_map['population'].min()) / (us_raw_map['population'].max() - us_raw_map['population'].min())
us_raw_map['population_level'] =  pd.cut(us_raw_map['scale_population'], bins = 5, labels = False) + 1

# concat back top 5 city back into original dataset
us_raw_map = pd.concat([top_5_city_in_us, us_raw_map], axis=0)
us_raw_map = us_raw_map.reset_index(drop=True)

# one (lat,lng) coordinate contain multiple cities --> final scaling
aggregate_us_raw_map = us_raw_map.groupby(['lat','lng'])['population_level'].sum().reset_index(name='population_level')
aggregate_us_raw_map['scale_population'] =  pd.cut(aggregate_us_raw_map['population_level'], bins = 10, labels = False) + 1
aggregate_us_raw_map

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_5_city_in_us['population_level'] = 10


Unnamed: 0,lat,lng,population_level,scale_population
0,24,43,7,1
1,24,44,3,1
2,25,27,6,1
3,25,43,6,1
4,25,44,86,3
...,...,...,...,...
885,48,29,5,1
886,48,30,3,1
887,48,31,4,1
888,48,32,1,1


In [7]:
us_map = np.zeros((60, 100))

for i in range(aggregate_us_raw_map.shape[0]):

    x = aggregate_us_raw_map['lat'][i] 
    y = aggregate_us_raw_map['lng'][i] + 35
    population_level = aggregate_us_raw_map['scale_population'][i] 
    
    us_map[x, y] = population_level


us_map_flipped = np.flipud(us_map)
# simple visualization
fig = px.imshow(us_map_flipped, color_continuous_scale=[(0.00, 'white')])
fig.show()

### Japan Map

- Data Sources: https://simplemaps.com/data/world-cities

In [13]:
df = pd.read_excel("jpcities.xlsx").dropna()
jp_raw_map = df[['city', 'lat', 'lng', 'population']].copy()

# adjust lat & lng to x,y coordinate
jp_raw_map['lat'] = jp_raw_map.apply(lambda x: transform_to_integer(x['lat']), axis = 1)
jp_raw_map['lng'] = jp_raw_map.apply(lambda x: transform_to_integer(x['lng']), axis = 1)
jp_raw_map['lng'] = jp_raw_map['lng'] - 100



jp_raw_map['scale_population'] = (jp_raw_map['population'] - jp_raw_map['population'].min()) / (jp_raw_map['population'].max() - jp_raw_map['population'].min())
jp_raw_map['population_level'] =  pd.cut(jp_raw_map['scale_population'], bins = 5, labels = False) + 1

# concat back top 5 city back into original dataset
# jp_raw_map = pd.concat([top_5_city_in_jp, jp_raw_map], axis=0)
jp_raw_map = jp_raw_map.reset_index(drop=True)

# one (lat,lng) coordinate contain multiple cities --> final scaling
aggregate_jp_raw_map = jp_raw_map.groupby(['lat','lng'])['population_level'].sum().reset_index(name='population_level')
aggregate_jp_raw_map['scale_population'] =  pd.cut(aggregate_jp_raw_map['population_level'], bins = 10, labels = False) + 1
aggregate_jp_raw_map.head(10)

Unnamed: 0,lat,lng,population_level,scale_population
0,26,27,1,1
1,31,30,1,1
2,31,31,1,1
3,32,29,1,1
4,32,30,1,1
5,33,29,1,1
6,33,30,2,2
7,33,31,1,1
8,33,32,2,2
9,33,33,1,1


In [15]:
jp_map = np.zeros((60, 100))

for i in range(aggregate_jp_raw_map.shape[0]):

    x = aggregate_jp_raw_map['lat'][i]
    y = aggregate_jp_raw_map['lng'][i] - 20
    population_level = aggregate_jp_raw_map['scale_population'][i] 
    
    jp_map[x, y] = population_level

jp_map_flipped = np.flipud(jp_map)

# simple visualization
fig = px.imshow(jp_map_flipped, color_continuous_scale=[(0.00, 'white')])
fig.show()

### World Map

In [12]:
world_map = jp_map_flipped + us_map_flipped

fig = px.imshow(world_map)
fig.show()

### Loading Map Data

In [6]:
us_map = pd.read_excel("MAP.xlsx", sheet_name = "US_MAP").values

fig = px.imshow(us_map)
fig.show()

us_map

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [8]:
jp_map = pd.read_excel("MAP.xlsx", sheet_name = "JP_MAP").values

fig = px.imshow(jp_map)
fig.show()

jp_map

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])