In [18]:
import os, requests, base64, imageio
from PIL import Image
from io import BytesIO
import seaborn as sn
import pandas as pd
import numpy as np
from scipy import misc
import re
import glob
import matplotlib.pyplot as plt

In [19]:
# funtions
def topo_change(image1, image2):
    
    num_idx = re.compile(r'\d+')
    name = re.split(r'_',image1)
    
    if len(name) > 5:
        city = name[1]+ ' ' + name[2]
    else:
        city = name[1]
    
    idx1 = num_idx.findall(image1)
    year1 = int(idx1[1])
    
    idx2 = num_idx.findall(image2)
    year2 = int(idx2[2])

    #im1 = imageio.imread(image1)
    #print(im1.shape)
    
    avg_rgb_1 = imageio.imread(image1).mean((0,1))
    avg_rgb_2 = imageio.imread(image2).mean((0,1))
    
    per_diff = abs(avg_rgb_1-avg_rgb_2) / avg_rgb_1
    delta_year = abs(year2 - year1)
    
    return city, (per_diff/delta_year)*100

# TESTING #    
PATH = os.path.join(os.getcwd(),'images')

image1 = os.path.join(PATH, '_Albany_2017_03_01.png')
image2 = os.path.join(PATH, '_Albany_2020_05_01.png')

city, rate_of_change = topo_change(image1, image2)

print("FUNCTION TESTING:")
print("The rate of change between year 2017 and 2020 for Albany, New York\n R:%.5f percent, G:%.5f percent, B:%.5f percent"
                                            %(rate_of_change[0], rate_of_change[1], rate_of_change[2]))

FUNCTION TESTING:
The rate of change between year 2017 and 2020 for Albany, New York
 R:0.00663 percent, G:0.00684 percent, B:0.01108 percent


In [21]:
# Data Set Cleaning

all_cities = pd.read_csv('us_cities_v2.csv')
all_cities.head()

capital_mask = all_cities['capital'] == True
just_capitals = all_cities[capital_mask]

just_capitals = just_capitals.sort_values(by=['city'])
drop_list = ['state', 'military', 'incorporated'] 

just_capitals
# we might add state back into our final data set if we cannot remember state id's
# removed 'military' because all were 'False'
# removed 'incorporated' becuase all were 'True'

just_capitals=just_capitals.drop(columns=drop_list)

just_capitals=just_capitals.reset_index(drop=True)

empty = []
path = []
cities = []
data = []

# loop over png images in folder ~/images
i = 0
for image in sorted(glob.glob(os.path.join(PATH,'*.png'))):
    
    name = re.split(r'_',image)
    
    if len(name) > 5:
        city = name[1]+ ' ' + name[2]
    else:
        city = name[1]
    
    path.append(image)
    empty.append(city)

    if i > 0:
        
        if empty[i] == empty[i-1]:
            
            image1 = os.path.join(path[i])
            image2 = os.path.join(path[i-1])
            city, rate_of_change = topo_change(image1, image2)
            data.append([city, rate_of_change[0], rate_of_change[1], rate_of_change[2]])
  
    i += 1

df = pd.DataFrame(data, columns = ['Cities', 'dR_year', 'dG_year', 'dB_year'])

just_capitals['dR_year'] = df['dR_year'] 
just_capitals['dG_year'] = df['dG_year'] 
just_capitals['dB_year'] = df['dB_year']

just_capitals = just_capitals.drop(columns='capital')
just_capitals.to_csv('just_capitals.csv')

just_capitals


Unnamed: 0,city,state_id,county,lat,long,population,density,timezone,ranking,zips,dR_year,dG_year,dB_year
0,Albany,NY,Albany,42.6664,-73.7987,586383,1740,America/New_York,2,38,0.005838,0.005998,0.009038
1,Annapolis,MD,Anne Arundel,38.9706,-76.5047,39223,2100,America/New_York,2,4,0.017253,0.013068,0.004154
2,Atlanta,GA,Fulton,33.7627,-84.4224,5449398,1441,America/New_York,1,56,0.006408,0.007584,0.013648
3,Augusta,ME,Kennebec,44.3341,-69.7319,22116,130,America/New_York,3,5,0.019463,0.017133,0.011631
4,Austin,TX,Travis,30.3004,-97.7522,1687311,1181,America/Chicago,1,71,6.9e-05,0.000237,0.000742
5,Baton Rouge,LA,East Baton Rouge,30.4418,-91.131,570308,983,America/Chicago,2,33,0.001811,0.001358,0.000775
6,Bismarck,ND,Burleigh,46.8144,-100.7694,98345,827,America/Chicago,2,5,0.00692,0.008423,0.00895
7,Boise,ID,Ada,43.6007,-116.2312,389280,1056,America/Boise,2,22,0.00474,0.005271,0.004664
8,Boston,MA,Suffolk,42.3188,-71.0846,4688346,5532,America/New_York,1,53,0.000903,0.000979,0.00358
9,Carson City,NV,Carson City,39.1512,-119.7474,58756,149,America/Los_Angeles,2,9,0.000182,0.000132,0.000443
