# Import

In [1]:
import os
import warnings
warnings.filterwarnings("ignore")
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd

In [2]:
# Define output folder path
output_folder_path = os.path.join('..', '..', 'data','02_intermediate_output')

# Define data folder path
data_folder_path = os.path.join('..','..', 'data','01_input_data')

In [3]:
#Read Perceived_crime-safety_GSV csv file
gsv_path = os.path.join(data_folder_path, 'Perceived_crime-safety_GSV.csv')
gsv_df = pd.read_csv(gsv_path)

#View 
gsv_df.head()

Unnamed: 0,gdriveid,safety,name,filename,img_path
0,1Mr2Z7-PwovxmO3Ui9edPIJiPcMD9IFUG,4.171727,-22.76741576_-43.11201696_288_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...
1,1Msu85ZbpnDshTbz0Nk7Wr9Gx29_jL9kB,4.310455,-22.76741576_-43.11201696_216_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...
2,1MwIBACdW-H83wsyeGPmUD69xmiy5JnBX,4.46162,-22.76741576_-43.11201696_144_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...
3,1N32hFQV45smS404XQ7rrrM-cq5WUm7-m,4.424641,-22.76741576_-43.11201696_72_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...
4,1N4YRfYN7hduxyods1SCizg-7p2_lvi-m,4.778797,-22.76741576_-43.11201696_0_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...


### Extract latitude, longitude, direction, and date from "name" colunm

In [4]:

pattern = r'([-+]?\d*\.\d+)_([-+]?\d*\.\d+)_(\d+)_(\d{4}-\d{2})\.jpg'
extracted_data = gsv_df['name'].str.extract(pattern, expand=True)

extracted_data.columns = ['latitude', 'longitude', 'direction', 'date']
gsv_df = pd.concat([gsv_df, extracted_data], axis=1)


# ss_df.dropna()
gsv_df.head()

Unnamed: 0,gdriveid,safety,name,filename,img_path,latitude,longitude,direction,date
0,1Mr2Z7-PwovxmO3Ui9edPIJiPcMD9IFUG,4.171727,-22.76741576_-43.11201696_288_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...,-22.76741576,-43.11201696,288,2018-03
1,1Msu85ZbpnDshTbz0Nk7Wr9Gx29_jL9kB,4.310455,-22.76741576_-43.11201696_216_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...,-22.76741576,-43.11201696,216,2018-03
2,1MwIBACdW-H83wsyeGPmUD69xmiy5JnBX,4.46162,-22.76741576_-43.11201696_144_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...,-22.76741576,-43.11201696,144,2018-03
3,1N32hFQV45smS404XQ7rrrM-cq5WUm7-m,4.424641,-22.76741576_-43.11201696_72_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...,-22.76741576,-43.11201696,72,2018-03
4,1N4YRfYN7hduxyods1SCizg-7p2_lvi-m,4.778797,-22.76741576_-43.11201696_0_2018-03.jpg,rio_Ilha_de_Paqueta,rio_Ilha_de_Paqueta/-22.76741576_-43.11201696_...,-22.76741576,-43.11201696,0,2018-03


### Calculate the mean Perceived crime-safety of Google street view  for each latitude and longitude

In [5]:
mean_GSV_score = gsv_df.groupby(['latitude', 'longitude'])['safety'].mean().reset_index()
mean_GSV_score.head()

Unnamed: 0,latitude,longitude,safety
0,-22.0,-43.0,3.32267
1,-22.74980048,-43.10608246,3.830749
2,-22.7500211,-43.10731249,4.656913
3,-22.75013537,-43.10648129,4.806924
4,-22.75036273,-43.10624367,5.019889


In [6]:
# make it a GDF for further calc
geometry = [Point(xy) for xy in zip(mean_GSV_score['longitude'], mean_GSV_score['latitude'])]
gsv_point_df = gpd.GeoDataFrame(mean_GSV_score, geometry=geometry, crs=4326)

# Reproject to EPSG 31983
gsv_point_df = gsv_point_df.to_crs(epsg=31983)
gsv_point_df.head()

Unnamed: 0,latitude,longitude,safety,geometry
0,-22.0,-43.0,3.32267,POINT (706472.059 7565822.726)
1,-22.74980048,-43.10608246,3.830749,POINT (694472.434 7482933.568)
2,-22.7500211,-43.10731249,4.656913,POINT (694345.787 7482910.752)
3,-22.75013537,-43.10648129,4.806924,POINT (694430.997 7482897.006)
4,-22.75036273,-43.10624367,5.019889,POINT (694455.080 7482871.517)


In [7]:
# Save the aggregated GeoDataFrame to a GeoPackage file
output_filepath = os.path.join(output_folder_path, 'Perceived_crime-safety_point.gpkg')
gsv_point_df.to_file(output_filepath, driver='GPKG')