In [1]:
import pandas as pd

In [5]:
frame = pd.read_csv('../scraped_csvs/warsaw_map_population_density.csv')

In [6]:
frame.head().T

Unnamed: 0,0,1,2,3,4
y,7490046.0,7490046.0,7490046.0,7490046.0,7490046.0
x,5799703.0,5797703.0,5795703.0,5793703.0,5791703.0
r,0.0,0.0,0.0,0.0,0.08311688
g,0.0,0.0,0.0,0.0,0.06717596
b,0.0,0.0,0.0,0.0,0.02304558
lon,20.85397,20.85403,20.85409,20.85415,20.85421
lat,52.33069,52.31271,52.29474,52.27676,52.25879


In [7]:
frame['sum'] = frame['r'] + frame['g'] + frame['b']

In [8]:
frame.shape

(210, 8)

In [11]:
import numpy as np
import matplotlib.pyplot as plt
import griddata

                         # the total number of data points.
x = frame['lon']         # create some normally distributed dependent data in x.
y = frame['lat']          # ... do the same for y.
z = frame['sum']

# plot some profiles / cross-sections for some visualization.  our
# function is a symmetric, upward opening paraboloid z = x**2 + y**2.
# We expect it to be symmetric about and and y, attain a minimum on
# the origin and display minor Gaussian noise.

plt.rcParams["figure.figsize"] = [60,20]
plt.ion()   # pyplot interactive mode on

# x vs z cross-section.  notice the noise.
plt.plot(x, z, '.')
plt.title('X vs Z=F(X,Y=constant)')
plt.xlabel('X')
plt.ylabel('Z')

# y vs z cross-section.  notice the noise.
plt.plot(y, z, '.')
plt.title('Y vs Z=F(Y,X=constant)')
plt.xlabel('Y')
plt.ylabel('Z')

# now show the dependent data (x vs y).  we could represent the z data
# as a third axis by either a 3d plot or contour plot, but we need to
# grid it first....
plt.plot(x, y, '.')
plt.title('X vs Y')
plt.xlabel('X')
plt.ylabel('Y')

# enter the gridding.  imagine drawing a symmetrical grid over the
# plot above.  the binsize is the width and height of one of the grid
# cells, or bins in units of x and y.
binsize = 0.008
grid, bins, binloc = griddata.griddata(x, y, z, binsize=binsize)  # see this routine's docstring


# minimum values for colorbar. filter our nans which are in the grid
zmin    = grid[np.where(np.isnan(grid) == False)].min()
zmax    = grid[np.where(np.isnan(grid) == False)].max()

# colorbar stuff
palette = plt.matplotlib.colors.LinearSegmentedColormap('jet3',plt.cm.datad['jet'],2048)
palette.set_under(alpha=0.0)

# plot the results.  first plot is x, y vs z, where z is a filled level plot.
extent = (x.min(), x.max(), y.min(), y.max()) # extent of the plot
plt.subplot(1, 2, 1)
plt.imshow(grid, extent=extent, cmap=palette, origin='lower', vmin=zmin, vmax=zmax, aspect='auto', interpolation='bilinear')
plt.xlabel('X values')
plt.ylabel('Y values')
plt.title('Z = F(X, Y)')
plt.colorbar()

# now show the number of points in each bin.  since the independent data are
# Gaussian distributed, we expect a 2D Gaussian.
plt.subplot(1, 2, 2)
plt.imshow(bins, extent=extent, cmap=palette, origin='lower', vmin=0, vmax=bins.max(), aspect='auto', interpolation='bilinear')
plt.xlabel('X values')
plt.ylabel('Y values')
plt.title('X, Y vs The No. of Pts Per Bin')
plt.colorbar()


ModuleNotFoundError: No module named 'matplotlib'