![](2024-08-14-18-07-11.png)

# Image Database Creation
## Author: Diversa
## Last Update: 27/8/2024
## Proyect: Feminist Urban Sense
## Contact: hello@diversa.studio
---

### 1.- Enviroment Preparation

In [None]:
import os
import pandas as pd

### 2.- Normalization and Pivot

In [1]:


# Set the directory path where your folders are located
directory_path = "your_path"

# Initialize an empty list to store the data
data = []


# Iterate through the folders in the directory
for country_folder in os.listdir(directory_path):
    # Construct the full path of the country folder
    country_path = os.path.join(directory_path, country_folder)

    # Check if the path is a directory (to skip any files)
    if os.path.isdir(country_path):
        # Iterate through the images in the country folder
        for image_file in os.listdir(country_path):
            # Only process PNG files
            if image_file.endswith(".png"):
                # Extract the image number and angle from the filename
                parts = image_file.replace("streetview_", "").replace(".png", "").split("_")
                numero_img = int(parts[0])
                angle = int(parts[1])

                # Append the data to the list
                data.append([country_folder.replace("_crudo", ""), numero_img, angle])

# Create a DataFrame from the data
df = pd.DataFrame(data, columns=["country", "numero_img", "angle"])

# Sort the DataFrame by 'country', 'numero_img', and 'angle'
df = df.sort_values(by=["country", "numero_img", "angle"]).reset_index(drop=True)

# Save the DataFrame to a CSV file
df.to_csv("df.csv", index=False)


In [2]:
df.head(20)

Unnamed: 0,country,numero_img,angle
0,argentina,0,0
1,argentina,0,90
2,argentina,0,180
3,argentina,0,270
4,argentina,1,0
5,argentina,1,90
6,argentina,1,180
7,argentina,1,270
8,argentina,2,0
9,argentina,2,90


### 3.- Unique Values Isolation

In [3]:
# Filter for angle 0
df_angle_0 = df[df['angle'] == 0]

# Get all unique numero_img values for each country
unique_img_numbers = df.groupby('country')['numero_img'].apply(set)

# Initialize an empty list to store the final data
final_data = []

# Iterate through each country and its unique image numbers
for country, img_numbers in unique_img_numbers.items():
    # Ensure all img_numbers are accounted for, including gaps
    all_img_numbers = range(min(img_numbers), max(img_numbers) + 1)

    for numero_img in all_img_numbers:
        if numero_img in img_numbers:
            # Check if there is an entry with angle 0
            if numero_img in df_angle_0[df_angle_0['country'] == country]['numero_img'].values:
                final_data.append([country, numero_img, 1])
            else:
                final_data.append([country, numero_img, 0])
        else:
            # If numero_img is missing, set download to 0
            final_data.append([country, numero_img, 0])

# Create a DataFrame for the final table
download_df = pd.DataFrame(final_data, columns=["country", "numero_img", "download"])

# Sort the new DataFrame
download_df = download_df.sort_values(by=['country', 'numero_img']).reset_index(drop=True)

# Save the DataFrame to a CSV file
download_df.to_csv("download_df.csv", index=False)

In [4]:
download_df.head(20)

Unnamed: 0,country,numero_img,download
0,argentina,0,1
1,argentina,1,1
2,argentina,2,1
3,argentina,3,1
4,argentina,4,1
5,argentina,5,1
6,argentina,6,1
7,argentina,7,1
8,argentina,8,1
9,argentina,9,1


### 3.- DataFrame Merging

In [9]:

# Load the DataFrames with | as the separator
df = pd.read_csv("/content/download_df.csv")
details_df = pd.read_csv("/content/nodos_expandidos.csv", sep='|')

# Concatenate the DataFrames side-by-side
combined_df = pd.concat([df, details_df], axis=1)

# Print the first few rows to check the result
print(combined_df.head())

# Save the combined DataFrame to a CSV file with | as the separator
combined_df.to_csv("combined_df.csv", index=False)

print("Combined DataFrame has been created and saved as 'combined_df.csv'.")


     country  numero_img  download  index         id    country country_code  \
0  argentina           0         1      1  ARGS20031  Argentina          ARG   
1  argentina           1         1      1  ARGS20031  Argentina          ARG   
2  argentina           2         1      1  ARGS20031  Argentina          ARG   
3  argentina           3         1      1  ARGS20031  Argentina          ARG   
4  argentina           4         1      1  ARGS20031  Argentina          ARG   

      city   node_type   latitude  longitude  
0  Rosario   principal -32.961405 -60.684841  
1  Rosario  secundaria -32.959729 -60.687142  
2  Rosario  secundaria -32.958462 -60.681169  
3  Rosario  secundaria -32.960805 -60.681762  
4  Rosario  secundaria -32.960559 -60.683139  
Combined DataFrame has been created and saved as 'combined_df.csv'.


In [11]:


# Load the DataFrame with | as the separator
combined_df = pd.read_csv("/content/combined_df.csv")

# Define a function to create the 'indice' column with the first letter of 'node_type'
def create_indice(row):
    node_type_initial = row['node_type'][0].upper()  # Get the first letter and make it uppercase
    return f"{row['country_code']}_{row['city']}_{node_type_initial}_{row['numero_img']}"

# Apply the function to create the 'indice' column
combined_df['indice'] = combined_df.apply(create_indice, axis=1)

# Reorder columns as needed
final_columns = [
    'numero_img', 'index', 'id', 'country', 'country_code', 'city',
    'node_type', 'latitude', 'longitude', 'indice', 'download'
]

# Select and reorder columns as required
result_df = combined_df[final_columns]

# Sort the final DataFrame by 'country' and 'numero_img'
result_df = result_df.sort_values(by=['country', 'numero_img']).reset_index(drop=True)

# Save the final DataFrame to a CSV file
result_df.to_csv("result_final_df.csv", index=False)

print("Final DataFrame with 'indice' column has been created and saved as 'result_final_df.csv'.")


Final DataFrame with 'indice' column has been created and saved as 'result_final_df.csv'.
