# Notebook for manipulating the provided data

## Dataframe stuff (copied from yolo.ipynb)

In [43]:
import numpy as np
import pandas as pd
from IPython.display import Image, display
import os

#expands table for printing/debug purposes
pd.set_option('display.width', 160)

file = open("NASA_Datasets - Sheet1.csv")

#discards first few lines of no data
for i in range(4):
    file.readline()

chicagoData = []
indianapolisData = []

#read every line of file
while not file.closed:
    dataRow = file.readline()
    if(dataRow == ''):
        file.close()
    else:
        dataRowArray = dataRow.split(',')
        dataRowArray = [string.strip() for string in dataRowArray]
        chicagoData.append(dataRowArray[0 : 6])
        indianapolisData.append(dataRowArray[7 : 13])

#cleaning up indianapolis data
while(indianapolisData[-1][0] == ''):
    #cleaning up empty rows from end caused by larger chicago data
    indianapolisData.pop(-1)

for row in indianapolisData:
    row[5] = row[5].replace('\n', '')

    
#paths to folders of image data
satelliteImageFilePathHeader = "satellite_images/"
streetviewImageFilePathHeader = "streetview_images/"

#set up data frames
chicagoDataFrame = pd.DataFrame(chicagoData)
chicagoDataFrame = chicagoDataFrame.rename(columns={0 : "ID", 1 : "Address", 2 : "SatelliteImageName", 3 : "StreetviewImageName", 4 : "Status", 5 : "Pool"})

indianapolisDataFrame = pd.DataFrame(indianapolisData)
indianapolisDataFrame = indianapolisDataFrame.rename(columns={0 : "ID", 1 : "Address", 2 : "SatelliteImageName", 3 : "StreetviewImageName", 4 : "Status", 5 : "Pool"})

#displaying all the stuff
print(chicagoDataFrame)
print(indianapolisDataFrame)
        


      ID                    Address SatelliteImageName StreetviewImageName     Status   Pool
0      1     13143 S Carondolet Ave              s1001               v1001  Abandoned  FALSE
1      2          7222 S Euclid Ave              s1002               v1002  Abandoned  FALSE
2      3          7236 S Euclid Ave              s1003               v1003      Owned  FALSE
3      4     1344 S Springfield Ave              s1004               v1004  Abandoned  FALSE
4      5             3853 W 14th St              s1005               v1005      Owned  FALSE
..   ...                        ...                ...                 ...        ...    ...
495  496  6531 S Eberhart Ave 60637             s10496              v10496      Owned  FALSE
496  497     4342 W Wilcox St 60624             s10497              v10497      Owned  FALSE
497  498     5331 S Honore St 60609             s10498              v10498  Abandoned  FALSE
498  499  3641 W Douglas Blvd 60623             s10499              v1

In [44]:
#fixing street view image formatting
def fixStreetImage(dataFrame):
    for i in range(dataFrame.shape[0]):
        streetviewImageName = dataFrame.at[i, 'StreetviewImageName']
        if(len(streetviewImageName) < 6):
            streetviewImageName = streetviewImageName.replace('v10','v100')
            streetviewImageName = streetviewImageName.replace('v20','v200')
            dataFrame.at[i, 'StreetviewImageName'] = streetviewImageName

    return dataFrame

chicagoDataFrame = fixStreetImage(chicagoDataFrame)
indianapolisDataFrame = fixStreetImage(indianapolisDataFrame)

#displaying all the stuff
print(chicagoDataFrame)
print(indianapolisDataFrame)


      ID                    Address SatelliteImageName StreetviewImageName     Status   Pool
0      1     13143 S Carondolet Ave              s1001              v10001  Abandoned  FALSE
1      2          7222 S Euclid Ave              s1002              v10002  Abandoned  FALSE
2      3          7236 S Euclid Ave              s1003              v10003      Owned  FALSE
3      4     1344 S Springfield Ave              s1004              v10004  Abandoned  FALSE
4      5             3853 W 14th St              s1005              v10005      Owned  FALSE
..   ...                        ...                ...                 ...        ...    ...
495  496  6531 S Eberhart Ave 60637             s10496              v10496      Owned  FALSE
496  497     4342 W Wilcox St 60624             s10497              v10497      Owned  FALSE
497  498     5331 S Honore St 60609             s10498              v10498  Abandoned  FALSE
498  499  3641 W Douglas Blvd 60623             s10499              v1

In [45]:
#concatenates the data
combinedDataFrame = pd.concat([chicagoDataFrame, indianapolisDataFrame])
print(combinedDataFrame)

      ID                     Address SatelliteImageName StreetviewImageName     Status   Pool
0      1      13143 S Carondolet Ave              s1001              v10001  Abandoned  FALSE
1      2           7222 S Euclid Ave              s1002              v10002  Abandoned  FALSE
2      3           7236 S Euclid Ave              s1003              v10003      Owned  FALSE
3      4      1344 S Springfield Ave              s1004              v10004  Abandoned  FALSE
4      5              3853 W 14th St              s1005              v10005      Owned  FALSE
..   ...                         ...                ...                 ...        ...    ...
286  287        4224 S Post Rd 46239             s20287              v20287      Owned  FALSE
287  288  4366 N ARLINGTON AVE 46226             s20288              v20288  Abandoned  FALSE
288  289        1010 W 27TH ST 46208             s20289              v20289  Abandoned  FALSE
289  290         933 W 27th St 46208             s20290     

## Image Manipulation

In [4]:
import cv2 as cv
from pathlib import Path
import shutil
import os

In [15]:
# mirrors all images in provided input_folder and creates an output_folder to write them to
# horiz and vert tell what axis to mirror along. setting both to true rotates image 180
# and setting both to false just clones the image exactly
# default is just horizontal mirroring 
def mirrorImageFiles(input_folder, output_folder, horiz=True, vert=False):
    Path(output_folder).mkdir(parents=True, exist_ok=True)
    
    imageFiles = os.listdir(input_folder)
    for imageFile in imageFiles:
        if(not os.path.isfile(f"{input_folder}/{imageFile}")):
            continue
        image = cv.imread(f"{input_folder}/{imageFile}")
        if(horiz):
            image = image[:, ::-1]
        if(vert):
            image = image[::-1]
        cv.imwrite(f"{output_folder}/{imageFile}", image)

#mirrorImageFiles("satellite_images", "satellite_images_mirrored")
#mirrorImageFiles("satellite_images", "satellite_images_mirrored_vert", horiz = False, vert = True)
#mirrorImageFiles("satellite_images", "satellite_images_flipped", horiz = True, vert = True)
mirrorImageFiles("streetview_images", "streetview_images_mirrored") #we can only really do horizontal mirroring on the streetview images since vertical mirroring would make the house upside-down

In [36]:
# stitching street view and satellite images together

# takes two images (cv2 arrays) as image, outputs the concatenated images side by side
def stitchImages(image1, image2):
    image1dims = image1.shape
    image2dims = image2.shape
    
    #pad bottoms of image so numpy can concatenate
    if(image1dims[0] < image2dims[0]):
        image1 = np.pad(image1, ((0, image2dims[0] - image1dims[0]), (0, 0), (0, 0)))
    
    if(image2dims[0] < image1dims[0]):
        image2 = np.pad(image2, ((0, image1dims[0] - image2dims[0]), (0, 0), (0, 0)))

    stitchedImage = np.concatenate((image1, image2), axis = 1)

    return stitchedImage

outputFolder = "streetsatellitestiched_semi_flipped"
Path(outputFolder).mkdir(parents=True, exist_ok=True)

for index,row in combinedDataFrame.iterrows():
    streetviewImageName = row['StreetviewImageName']
    satelliteImageName = row['SatelliteImageName']

    streetviewImage = cv.imread(f"streetview_images/{streetviewImageName}.png")
    satelliteImage = cv.imread(f"satellite_images_flipped/{satelliteImageName}.png")

    stitchedImage = stitchImages(streetviewImage, satelliteImage)

    cv.imwrite(f"{outputFolder}/{streetviewImageName}_{satelliteImageName}.png", stitchedImage)
    
    


[ WARN:0@77300.385] global loadsave.cpp:268 findDecoder imread_('streetview_images/v20261.png'): can't open/read file: check file path/integrity


AttributeError: 'NoneType' object has no attribute 'shape'

In [54]:
# overlaying street and satellite images
#TODO implement overlaying of images
def overlayImages(image1, image2):

    image1 = image1/255
    image2 = image2/255
    
    image1dims = image1.shape
    image2dims = image2.shape

    #pad bottoms of image so numpy can concatenate
    if(image1dims[0] < image2dims[0]):
        image1 = np.pad(image1, ((0, image2dims[0] - image1dims[0]), (0, 0), (0, 0)))
    
    if(image2dims[0] < image1dims[0]):
        image2 = np.pad(image2, ((0, image1dims[0] - image2dims[0]), (0, 0), (0, 0)))

        
    #pad right side of images so numpy can concatenate
    if(image1dims[1] < image2dims[1]):
        image1 = np.pad(image1, ((0, 0), (0, image2dims[1] - image1dims[1]), (0, 0)))
    
    if(image2dims[1] < image1dims[1]):
        image2 = np.pad(image2, ((0, 0), (0, image1dims[1] - image2dims[1]), (0, 0)))

    overlayImage = np.add(image1, image2) / 2

    overlayImage *= 255
    overlayImage = np.floor(overlayImage)

    return overlayImage

outputFolder = "streetsatelliteoverlayed"
Path(outputFolder).mkdir(parents=True, exist_ok=True)

for index,row in combinedDataFrame.iterrows():
    streetviewImageName = row['StreetviewImageName']
    satelliteImageName = row['SatelliteImageName']

    streetviewImage = cv.imread(f"streetview_images/{streetviewImageName}.png")
    satelliteImage = cv.imread(f"satellite_images/{satelliteImageName}.png")

    overlayImage = overlayImages(streetviewImage, satelliteImage)

    cv.imwrite(f"{outputFolder}/{streetviewImageName}_{satelliteImageName}.png", overlayImage)

[ WARN:0@253398.656] global loadsave.cpp:268 findDecoder imread_('streetview_images/v20261.png'): can't open/read file: check file path/integrity


TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

## Miscellaneous data analysis

In [39]:
poolCount = 0
noPoolCount = 0
totalCount = 0

for index,row in combinedDataFrame.iterrows():
    hasPool = row['Pool'] == 'TRUE'

    if(hasPool):
        poolCount += 1
    else:
        noPoolCount += 1

    totalCount += 1

print(f"Out of {totalCount} houses, {poolCount} had pools, while {noPoolCount} did not.")
print(f"{round(poolCount/totalCount, 3) * 100}% of houses had pools")


Out of 791 houses, 32 had pools, while 759 did not.
4.0% of houses had pools


In [40]:
abandonedCount = 0
ownedCount = 0
totalCount = 0

for index,row in combinedDataFrame.iterrows():
    isAbandoned = row['Status'] == 'Abandoned'

    if(isAbandoned):
        abandonedCount += 1
    else:
        ownedCount += 1

    totalCount += 1

print(f"Out of {totalCount} houses, {abandonedCount} were abandoned, while {ownedCount} were owned.")
print(f"{round(abandonedCount/totalCount, 3) * 100}% of houses are abandoned")

Out of 791 houses, 404 were abandoned, while 387 were owned.
51.1% of houses are abandoned
