### Libraries

In [None]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import shape, Point, Polygon
import ast 
import folium
from shapely.wkt import loads 

### Cleaning based on the shp file

In [None]:

# Load the shapefile (replace with your actual file path)
shapefile_path = "geoBoundaries-SAU-ADM2-all/geoBoundaries-SAU-ADM2.shp"
gdf = gpd.read_file(shapefile_path)

# Check available columns to identify city names
print(gdf.columns)
print(gdf.head())

# Filter for Dammam, Khobar, and Dhahran
cities = ["Dammam", "Al Khubar Governorate", "Dhahran"]
filtered_gdf = gdf[gdf["shapeName"].isin(cities)]  # Adjust "NAME" to match actual city name column

# Load population data
population_data_path = "../Collected Data/saudi_population.csv"
df_pop = pd.read_csv(population_data_path)


# Function to check if a point is inside any of the city boundaries
def is_in_city(lat, lon, city_shapes):
    point = Point(lon, lat)  # Swap the order for Latitude, Longitude
    return any(shape.contains(point) for shape in city_shapes)

# Extract city polygons
city_shapes = filtered_gdf.geometry

# Convert the geometry_region column to Polygon objects
def parse_geometry(geometry_str):
    # Convert the string representation of the polygon to a list of coordinates
    coordinates = ast.literal_eval(geometry_str)  # Safely evaluate the string to a list
    # Convert the list of coordinates to a shapely Polygon object
    return Polygon(coordinates[0])  # Assuming the geometry is a simple polygon

# Apply the parsing function to the geometry_region column
df_pop['geometry'] = df_pop['geometry_region'].apply(parse_geometry)

# Add centroid coordinates (this is where we extract the representative point)
df_pop['centroid'] = df_pop['geometry'].apply(lambda x: x.centroid)

# Apply filtering to check if population centroids are within the city boundaries
df_pop["inside_city"] = df_pop.apply(
    lambda row: is_in_city(row["centroid"].y, row["centroid"].x, city_shapes), axis=1
)

# Filter data to include only points inside the city boundaries
filtered_pop_data = df_pop[df_pop["inside_city"]]

# Save the filtered data
filtered_pop_data.to_csv("final_filtered_population_data.csv", index=False)
print("Filtered population data saved!")




### GeoJSON way 

In [None]:
import geopandas as gpd
import pandas as pd
import ast
from shapely.geometry import Point, Polygon

# Load the GeoJSON file (replace with your actual file path)
geojson_path = "export (3).geojson"
gdf = gpd.read_file(geojson_path)

# Check available columns to identify city names
print(gdf.columns)
print(gdf.head())

# Filter for Dammam, Khobar, and Dhahran using the "name" property in the GeoJSON
cities = ["Dammam Governorate", "Khobar Governorate"]
filtered_gdf = gdf[gdf["name:en"].isin(cities)]  # Adjust the field to match your GeoJSON structure

# Load population data
population_data_path = "../Collected Data/saudi_population.csv"
df_pop = pd.read_csv(population_data_path)

# Function to check if a point is inside any of the city boundaries
def is_in_city(lat, lon, city_shapes):
    point = Point(lon, lat)  # Swap the order for Latitude, Longitude
    return any(shape.contains(point) for shape in city_shapes)

# Extract city polygons
city_shapes = filtered_gdf.geometry

# Convert the geometry_region column to Polygon objects
def parse_geometry(geometry_str):
    # Convert the string representation of the polygon to a list of coordinates
    coordinates = ast.literal_eval(geometry_str)  # Safely evaluate the string to a list
    # Convert the list of coordinates to a shapely Polygon object
    return Polygon(coordinates[0])  # Assuming the geometry is a simple polygon


df_pop['geometry'] = df_pop['geometry_region'].apply(parse_geometry)

# Add centroid coordinates 
df_pop['centroid'] = df_pop['geometry'].apply(lambda x: x.centroid)

# Apply filtering to check if population centroids are within the city boundaries
df_pop["inside_city"] = df_pop.apply(
    lambda row: is_in_city(row["centroid"].y, row["centroid"].x, city_shapes), axis=1
)

# Filter data to include only points inside the city boundaries
filtered_pop_data = df_pop[df_pop["inside_city"]]

# Save the filtered data
filtered_pop_data.to_csv("geojson_filtered_population_data.csv", index=False)
print("Filtered population data saved!")


In [None]:
import geopandas as gpd
import pandas as pd
import ast
from shapely.geometry import Point, Polygon

# Load GeoJSON data (replace with your actual path)
geojson_path = "Cities.geojson"
gdf = gpd.read_file(geojson_path)

# Check the structure of the GeoJSON to find the correct column for city names
print(gdf.columns)  
print(gdf.head())   # Display the first few rows to understand the structure


population_data_path = "geojson_filtered_population_data.csv"
df_pop = pd.read_csv(population_data_path)

# Function to check if a point is inside any of the city boundaries
def is_in_city(lat, lon, city_shapes, city_names):
    point = Point(lon, lat)  # Swap the order for Latitude, Longitude
    for i, shape in enumerate(city_shapes):
        if shape.contains(point):
            return city_names[i]  # Return the city name if inside the shape
    return None  # Return None if not inside any city boundary

# Extract city polygons and city names from the GeoJSON data
city_shapes = gdf.geometry
city_names = gdf['name:en']  # Adjust the column name based on your GeoJSON structure

# Convert the geometry_region column to Polygon objects
def parse_geometry(geometry_str):
    # Convert the string representation of the polygon to a list of coordinates
    coordinates = ast.literal_eval(geometry_str)  # Safely evaluate the string to a list
    # Convert the list of coordinates to a shapely Polygon object
    return Polygon(coordinates[0])  # Assuming the geometry is a simple polygon

# Apply the parsing function to the geometry_region column
df_pop['geometry'] = df_pop['geometry_region'].apply(parse_geometry)

# Add centroid coordinates (this is where we extract the representative point)
df_pop['centroid'] = df_pop['geometry'].apply(lambda x: x.centroid)

# Apply filtering to check if population centroids are within the city boundaries
df_pop["city"] = df_pop.apply(
    lambda row: is_in_city(row["centroid"].y, row["centroid"].x, city_shapes, city_names), axis=1
)

# Save the filtered data with the new city column
df_pop.to_csv("final_filtered_population_data_with_city.csv", index=False)
print("Filtered population data with city added! Saved as 'final_filtered_population_data_with_city.csv'.")


### Map Visualization

In [None]:
# Step 1: Load the filtered dataset
df = pd.read_csv('geojson_filtered_population_data.csv')

# Step 2: Create a base map centered around the approximate center of the cities
map_center = [df["centroid"].apply(lambda x: loads(x).y).mean(), df["centroid"].apply(lambda x: loads(x).x).mean()]
mymap = folium.Map(location=map_center, zoom_start=12)

# Step 3: Function to extract and plot polygons
def plot_polygon_on_map(geometry_wkt, centroid_wkt, city, population_count):
    try:
        # Convert WKT strings to geometry objects
        polygon = loads(geometry_wkt)
        centroid = loads(centroid_wkt)

        # Convert polygon coordinates into a list for folium
        coords = [[point[1], point[0]] for point in list(polygon.exterior.coords)]

        # Add the polygon to the map
        folium.Polygon(
            locations=coords,
            color='blue',
            weight=2,
            fill=True,
            fill_color='blue',
            fill_opacity=0.4
        ).add_to(mymap)

        # Add a marker at the centroid of the polygon
        folium.Marker(
            location=[centroid.y, centroid.x],
            popup=f"{city}<br>Population: {population_count}",
            icon=folium.Icon(color='red', icon='info-sign')
        ).add_to(mymap)

    except Exception as e:
        print(f"Error parsing geometry: {e}")

# Step 4: Iterate through each row and plot the polygons & markers
for _, row in df.iterrows():
    plot_polygon_on_map(row['geometry'], row['centroid'], row.get('city', 'Unknown City'), row['population_count'])

# Step 5: Save the map as an HTML file
mymap.save('geojson_cities_map.html')

print("âœ… Map saved as 'final_cities_map.html'. Open it in your browser to view.")
