In [10]:
from ultralytics import YOLO

model = YOLO("yolov8m.pt")

# Creating the data frames 
### (run everything here before anything that works with the data frames)

In [2]:
import numpy as np
import pandas as pd
from IPython.display import Image, display
import os

#expands table for printing/debug purposes
pd.set_option('display.width', 160)

file = open("NASA_Datasets - Sheet1.csv")

#discards first few lines of no data
for i in range(4):
    file.readline()

chicagoData = []
indianapolisData = []

#read every line of file
while not file.closed:
    dataRow = file.readline()
    if(dataRow == ''):
        file.close()
    else:
        dataRowArray = dataRow.split(',')
        dataRowArray = [string.strip() for string in dataRowArray]
        chicagoData.append(dataRowArray[0 : 6])
        indianapolisData.append(dataRowArray[7 : 13])

#cleaning up indianapolis data
while(indianapolisData[-1][0] == ''):
    #cleaning up empty rows from end caused by larger chicago data
    indianapolisData.pop(-1)

for row in indianapolisData:
    row[5] = row[5].replace('\n', '')

    
#paths to folders of image data
satelliteImageFilePathHeader = "satellite_images/"
streetviewImageFilePathHeader = "streetview_images/"

#set up data frames
chicagoDataFrame = pd.DataFrame(chicagoData)
chicagoDataFrame = chicagoDataFrame.rename(columns={0 : "ID", 1 : "Address", 2 : "SatelliteImageName", 3 : "StreetviewImageName", 4 : "Status", 5 : "Pool"})

indianapolisDataFrame = pd.DataFrame(indianapolisData)
indianapolisDataFrame = indianapolisDataFrame.rename(columns={0 : "ID", 1 : "Address", 2 : "SatelliteImageName", 3 : "StreetviewImageName", 4 : "Status", 5 : "Pool"})

#displaying all the stuff
print(chicagoDataFrame)
print(indianapolisDataFrame)
        


      ID                    Address SatelliteImageName StreetviewImageName     Status   Pool
0      1     13143 S Carondolet Ave              s1001               v1001  Abandoned  FALSE
1      2          7222 S Euclid Ave              s1002               v1002  Abandoned  FALSE
2      3          7236 S Euclid Ave              s1003               v1003      Owned  FALSE
3      4     1344 S Springfield Ave              s1004               v1004  Abandoned  FALSE
4      5             3853 W 14th St              s1005               v1005      Owned  FALSE
..   ...                        ...                ...                 ...        ...    ...
495  496  6531 S Eberhart Ave 60637             s10496              v10496      Owned  FALSE
496  497     4342 W Wilcox St 60624             s10497              v10497      Owned  FALSE
497  498     5331 S Honore St 60609             s10498              v10498  Abandoned  FALSE
498  499  3641 W Douglas Blvd 60623             s10499              v1

In [3]:
#fixing street view image formatting
def fixStreetImage(dataFrame):
    for i in range(dataFrame.shape[0]):
        streetviewImageName = dataFrame.at[i, 'StreetviewImageName']
        if(len(streetviewImageName) < 6):
            streetviewImageName = streetviewImageName.replace('v10','v100')
            streetviewImageName = streetviewImageName.replace('v20','v200')
            dataFrame.at[i, 'StreetviewImageName'] = streetviewImageName

    return dataFrame

chicagoDataFrame = fixStreetImage(chicagoDataFrame)
indianapolisDataFrame = fixStreetImage(indianapolisDataFrame)

#displaying all the stuff
print(chicagoDataFrame)
print(indianapolisDataFrame)


      ID                    Address SatelliteImageName StreetviewImageName     Status   Pool
0      1     13143 S Carondolet Ave              s1001              v10001  Abandoned  FALSE
1      2          7222 S Euclid Ave              s1002              v10002  Abandoned  FALSE
2      3          7236 S Euclid Ave              s1003              v10003      Owned  FALSE
3      4     1344 S Springfield Ave              s1004              v10004  Abandoned  FALSE
4      5             3853 W 14th St              s1005              v10005      Owned  FALSE
..   ...                        ...                ...                 ...        ...    ...
495  496  6531 S Eberhart Ave 60637             s10496              v10496      Owned  FALSE
496  497     4342 W Wilcox St 60624             s10497              v10497      Owned  FALSE
497  498     5331 S Honore St 60609             s10498              v10498  Abandoned  FALSE
498  499  3641 W Douglas Blvd 60623             s10499              v1

In [4]:
#concatenates the data
combinedDataFrame = pd.concat([chicagoDataFrame, indianapolisDataFrame])
print(combinedDataFrame)

      ID                     Address SatelliteImageName StreetviewImageName     Status   Pool
0      1      13143 S Carondolet Ave              s1001              v10001  Abandoned  FALSE
1      2           7222 S Euclid Ave              s1002              v10002  Abandoned  FALSE
2      3           7236 S Euclid Ave              s1003              v10003      Owned  FALSE
3      4      1344 S Springfield Ave              s1004              v10004  Abandoned  FALSE
4      5              3853 W 14th St              s1005              v10005      Owned  FALSE
..   ...                         ...                ...                 ...        ...    ...
286  287        4224 S Post Rd 46239             s20287              v20287      Owned  FALSE
287  288  4366 N ARLINGTON AVE 46226             s20288              v20288  Abandoned  FALSE
288  289        1010 W 27TH ST 46208             s20289              v20289  Abandoned  FALSE
289  290         933 W 27th St 46208             s20290     

# Creating classification data sets
### (creates the folders of training/testing/validation data)

In [7]:

import random, os.path

def createNewTrainingFileDirectory(dataFolder, datagroupFolder, classifyFolder):
    """
    helper function for creating the data set directories

    Parameters
    ----------
    dataFolder : what data set this should belong to
    datagroupFolder : train/val/test
    classifyFolder : what classification this belongs to

    Returns
    -------
    string : path to the directory of the file
        
    """
    path = "classifications/"
    path += dataFolder + "/" + datagroupFolder + "/" + classifyFolder
    
    return path

def createNewTrainingFilePath(name, dataFolder, datagroupFolder, classifyFolder):
    """
    helper function for creating the data set paths

    Parameters
    ----------
    name : name of file
    dataFolder : what data set this should belong to
    datagroupFolder : train/val/test
    classifyFolder : what classification this belongs to

    Returns
    -------
    string : path the copied file should have
        
    """
    path = createNewTrainingFileDirectory(dataFolder, datagroupFolder, classifyFolder) + "/" + name + "_" + classifyFolder + ".png"
    
    return path
    
#helper function that generates what set of data something should be in
def generateRandomDataGroup():
    dataGroupInt = random.randint(1, 10)
    dataGroup = ""
    if(dataGroupInt == 1):
        dataGroup = "val"
    elif(dataGroupInt == 2):
        dataGroup = "test"
    else:
        dataGroup = "train"
    return dataGroup


#paths to folders of image data
satelliteImageFilePathHeader = "satellite_images/"
streetviewImageFilePathHeader = "streetview_images/"

print("helper functions created")

helper functions created


In [None]:
#abandonned vs owned streetview dataset creation
import random, os.path
import shutil
from pathlib import Path

random.seed(42) #consistent results when generating training data

#turning the streetview image data into classification training data
for index,row in combinedDataFrame.iterrows():
    streetviewImageName = row['StreetviewImageName']
    path = streetviewImageFilePathHeader + streetviewImageName + ".png"

    abandoned = row['Status'].strip().lower()
    
    datagroupFolder = generateRandomDataGroup()
    
    newPath = createNewTrainingFilePath(streetviewImageName, "streetview_abandonedness", datagroupFolder, abandoned)
    newDirectory = createNewTrainingFileDirectory("streetview_abandonedness", datagroupFolder, abandoned)

    if(os.path.isfile(path)):
        Path(newDirectory).mkdir(parents=True, exist_ok=True)
        shutil.copyfile(path, newPath)


In [None]:
#chicago vs indianapolis streetview dataset creation
import random, os.path
import shutil

random.seed(13)#consistent results when generating training data

#turning the streetview image data into classification training data
for i in range(chicagoDataFrame.shape[0]):
    streetviewImageName = chicagoDataFrame.at[i, 'StreetviewImageName']
    path = streetviewImageFilePathHeader + streetviewImageName + ".png"

    dataGroup = generateRandomDataGroup()
    
    newPath = createNewTrainingFilePath(streetviewImageName, "chi_vs_ind", dataGroup, 'chicago')
    
    shutil.copyfile(path, newPath)

for i in range(indianapolisDataFrame.shape[0]):
    streetviewImageName = indianapolisDataFrame.at[i, 'StreetviewImageName']
    path = streetviewImageFilePathHeader + streetviewImageName + ".png"

    dataGroup = generateRandomDataGroup()
    
    newPath = createNewTrainingFilePath(streetviewImageName, "chi_vs_ind", dataGroup, 'indianapolis')

    if(os.path.isfile(path)):
        shutil.copyfile(path, newPath)


In [None]:
#street view vs satellite view dataset creation
import random
from pathlib import Path
import shutil

#helper function for creating the data set paths
def createNewTrainingFilePath(name, dataFolder, satellite, validating):
    path = "classifications/"
    path += dataFolder + "/"
    
    if(not validating):
        path += "train/"
    else:
        path += "val/"
    
    if(satellite):
        path += "satellite/" + name + "_sat"
    else:
        path += "streetview/" + name + "_str"

    path += ".png"
    
    return path


#turning the streetview and satellite image data into classification training data
for i in range(chicagoDataFrame.shape[0]):
    satelliteviewImageName = chicagoDataFrame.at[i, 'SatelliteImageName']
    path = satelliteImageFilePathHeader + satelliteviewImageName + ".png"
    
    validating = random.randint(1, 8) == 1
    newPath = createNewTrainingFilePath(satelliteviewImageName, "satellite_vs_street", True, validating)
    
    shutil.copyfile(path, newPath)
    
    streetviewImageName = chicagoDataFrame.at[i, 'StreetviewImageName']
    path = streetviewImageFilePathHeader + streetviewImageName + ".png"
    
    validating = random.randint(1, 8) == 1
    newPath = createNewTrainingFilePath(streetviewImageName, "satellite_vs_street", False, validating)
    
    shutil.copyfile(path, newPath)

In [None]:
#pool vs no pool dataset creation
import random
from pathlib import Path
import shutil

#helper function for creating the data set paths
def createNewTrainingFilePath(name, dataFolder, pool, validating):
    path = "classifications/"
    path += dataFolder + "/"
    
    if(not validating):
        path += "train/"
    else:
        path += "val/"
    
    if(pool):
        path += "pool/" + name + "_pool"
    else:
        path += "no_pool/" + name + "_poolless"

    path += ".png"
    
    return path


#turning the streetview image data into classification training data
for i in range(chicagoDataFrame.shape[0]):
    satelliteviewImageName = chicagoDataFrame.at[i, 'SatelliteImageName']
    path = satelliteImageFilePathHeader + satelliteviewImageName + ".png"

    hasPool = chicagoDataFrame.at[i, 'Pool'] == 'TRUE'
    validating = random.randint(1, 8) == 1
    newPath = createNewTrainingFilePath(satelliteviewImageName, "satellite_poolhaving", hasPool, validating)
    
    shutil.copyfile(path, newPath)
    

# Abadonness Classification Model

In [None]:
#training abandonnessModel

modelPath = "yolov8m-cls.pt"
abandonnessModel = YOLO(modelPath) #uncomment to restart training from scratch
abandonnessModel.train(data="classifications/streetview_abandonedness", epochs = 200, device = [0,1], model = modelPath)

In [11]:
#getting pretrained model
modelPath = "abandonnessmodelbest.pt"
abandonnessModel = YOLO(modelPath)

In [12]:
#predicting from test data
testFolderPath = "classifications/streetview_abandonedness/test/abandoned"
abandonedTestResults = abandonnessModel.predict(testFolderPath)
testFolderPath = "classifications/streetview_abandonedness/test/owned"
ownedTestResults = abandonnessModel.predict(testFolderPath)



image 1/47 c:\Users\sirun\Documents\grad school stuff\Research Assistant Work\satelliteview\classifications\streetview_abandonedness\test\abandoned\v10001_abandoned.png: 224x224 abandoned 0.91, owned 0.09, .ipynb_checkpoints 0.00, 58.5ms
image 2/47 c:\Users\sirun\Documents\grad school stuff\Research Assistant Work\satelliteview\classifications\streetview_abandonedness\test\abandoned\v10007_abandoned.png: 224x224 abandoned 0.72, owned 0.28, .ipynb_checkpoints 0.00, 50.2ms
image 3/47 c:\Users\sirun\Documents\grad school stuff\Research Assistant Work\satelliteview\classifications\streetview_abandonedness\test\abandoned\v10009_abandoned.png: 224x224 abandoned 1.00, owned 0.00, .ipynb_checkpoints 0.00, 53.0ms
image 4/47 c:\Users\sirun\Documents\grad school stuff\Research Assistant Work\satelliteview\classifications\streetview_abandonedness\test\abandoned\v10036_abandoned.png: 224x224 abandoned 0.94, owned 0.05, .ipynb_checkpoints 0.00, 49.6ms
image 5/47 c:\Users\sirun\Documents\grad school

In [13]:
#analyzing test data
correctAbandons = 0
correctOwneds = 0
falseAbandons = 0
falseOwneds = 0
totalTests = 0

for result in abandonedTestResults:
    if(result.names[result.probs.top1] == 'abandoned'):
        correctAbandons += 1
    else:
        falseOwneds += 1
    totalTests += 1


for result in ownedTestResults:
    if(result.names[result.probs.top1] == 'owned'):
        correctOwneds += 1
    else:
        falseAbandons += 1
    totalTests += 1

#calculating accuracy (how accurate the model is at getting the correct results)
correctTests = correctAbandons + correctOwneds
accuracy = correctTests/totalTests

#calculating precision (how good the model is at avoiding false positives)
abandonnessPrecision = correctAbandons / (correctAbandons + falseAbandons)
ownedPrecision = correctOwneds / (correctOwneds + falseOwneds)

#calculating recall (how good the model is at avoiding false negatives)
abandonnessRecall = correctAbandons / (correctAbandons + falseOwneds)
ownedRecall = correctOwneds / (correctOwneds + falseAbandons) 

#calculating f1 score 
abandonnessF1 = 2 * (abandonnessRecall * abandonnessPrecision)/(abandonnessRecall + abandonnessPrecision)
ownedF1 = 2 * (ownedRecall * ownedPrecision)/(ownedRecall + ownedPrecision)

print("Accuracy: ", correctTests, "/", totalTests, " = ", round(accuracy, 3))
print()
print("Abandonness precision: ", round(abandonnessPrecision, 3))
print("Owned precision: ", round(ownedPrecision, 3))
print("Abandonness recall: ", round(abandonnessRecall, 3))
print("Owned recall: ", round(ownedRecall, 3))
print()
print("Abandonness f1 score: ", round(abandonnessF1, 3))
print("Owned f1 score: ", round(ownedF1, 3))

Accuracy:  59 / 81  =  0.728

Abandonness precision:  0.755
Owned precision:  0.688
Abandonness recall:  0.787
Owned recall:  0.647

Abandonness f1 score:  0.771
Owned f1 score:  0.667


# Satellite vs Street view classification model

In [None]:

satvsstrModel = YOLO("yolov8m-cls.pt")
satvsstrModel.train(data="classifications/satellite_vs_street", epochs = 20)

In [None]:
satvsstrModel.val()

In [None]:

streetViewPaths = [streetviewImageFilePathHeader + path + ".png" for path in indianapolisDataFrame.loc[:100, 'StreetviewImageName']]
satelliteViewPaths = [satelliteImageFilePathHeader + path + ".png" for path in indianapolisDataFrame.loc[:100, 'SatelliteImageName']]

results = satvsstrModel.predict(streetViewPaths + satelliteViewPaths)

# Pool vs no pool classification model
(not enough "pool" data for this one probably)

In [None]:
#poolModel = YOLO("yolov8m-cls.pt")
poolModel.train(data="classifications/satellite_poolhaving", epochs = 50)

In [None]:

#old helper function I used when testing results
def predictFromData(model, dataFrame, start, end, comparisonColumn, streetview = True):
    for i in range(start, end):
        if(streetview):
            fullPath = streetviewImageFilePathHeader + dataFrame.loc[i, 'StreetviewImageName'] + ".png"
        else:
            fullPath = satelliteImageFilePathHeader + dataFrame.loc[i, 'SatelliteImageName'] + ".png"
        results = model.predict(fullPath)
        result = results[0]
        print("For id: ", dataFrame.at[i, 'ID'], " model guessed ", result.names[result.probs.top1], " with confidence ", round(result.probs.top1conf.item(), 3), ". Actual data: ", dataFrame.at[i, comparisonColumn])

#reading prediction data to manually check model accuracy
predictFromData(poolModel, indianapolisDataFrame, 10, 20, 'Pool', False)

# City classification model

In [None]:
#cityModel = YOLO("yolov8m-cls.pt")
cityModel.train(data="classifications/chi_vs_ind", epochs = 100, device=[0, 1])

In [None]:
'''
cityModel = YOLO('runs/classify/train62/weights/best.pt')

paths =  [streetviewImageFilePathHeader + path + ".png" for path in chicagoDataFrame.loc[60:80, 'StreetviewImageName']]
paths += [streetviewImageFilePathHeader + path + ".png" for path in indianapolisDataFrame.loc[60:80, 'StreetviewImageName']]

results = cityModel.predict(paths)

for i in range(len(paths)):
    result = results[i]
    print("for image ", paths[i], " model predicted", result.names[result.probs.top1], " with confidence ", round(result.probs.top1conf.item(), 3))
'''

cityModel = YOLO('runs/classify/train16/weights/best.pt')

results = cityModel.predict('classifications/chi_vs_ind/test/chicago')
results += cityModel.predict('classifications/chi_vs_ind/test/indianapolis')


# Untrained YOLO v8 detection

In [None]:
detectionModel = YOLO("yolov8m.pt")

paths = [streetviewImageFilePathHeader + path + ".png" for path in indianapolisDataFrame.loc[:100, 'StreetviewImageName']]
results = model.predict(paths)
for result in results:
    if len(result.boxes) > 0:
        print("---")
        print("Found in ", result.path)
        for box in result.boxes:
            print("---")
            cords = box.xyxy[0].tolist()
            cords = [round(x, 1) for x in cords] # rounds each coordinate
            class_id = result.names[box.cls[0].item()] # gets the name of identified object from dictionary of all objects YOLO can identify
            conf = round(box.conf[0].item(), 2) # rounds to 2 digits
            
            print("Object type:", class_id)
            print("Coordinates:", cords)
            print("Probability:", conf)
            
        result.save("object_detection_results/" + result.path)
            

# Detection Model 
(old, data set too small and annoying to create)

In [None]:
#training detection model
model = YOLO("yolov8m.pt")
model.train(data=r"./detection_datasets/data.yaml", epochs = 200)

In [None]:
abandonnessModel = YOLO("runs/detect/train15/weights/best.pt")

results = abandonnessModel.predict("streetview_images/v10070.png")
results[0]

In [None]:
paths = [streetviewImageFilePathHeader + path + ".png" for path in chicagoDataFrame.loc[100:130, 'StreetviewImageName']]
results = abandonnessModel.predict(paths)

In [None]:

for i in range(len(results)):
    result = results[i]
    for box in result.boxes:
        print("---")
        cords = box.xyxy[0].tolist()
        cords = [round(x, 1) for x in cords] # rounds each coordinate
        class_id = result.names[box.cls[0].item()] # gets the name of identified object from dictionary of all objects YOLO can identify
        conf = round(box.conf[0].item(), 2) # rounds to 2 digits
        print("Said ID: ",(100 + i), " was ", class_id, ". Actual status: ", chicagoDataFrame.at[100 + i, 'Status'])
        

In [None]:
#extra training model
abandonnessModel.train(data="datasets/data.yaml", epochs = 5)