In [2]:
import numpy as np
import pandas as pd 
import geopandas as gpd
import matplotlib.pyplot as plt
import osmnx as ox
import networkx as nx
from descartes import PolygonPatch
from shapely.geometry import Point, Polygon, MultiPolygon, LineString
from glob import glob
from tqdm.notebook import tqdm
import shapely.speedups
from shapely import wkt

In [3]:
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

def load_city_data_by_name(city_name, target_crs="epsg:3005"):
    city = ox.geocode_to_gdf(city_name)
    city = ox.project_gdf(city)
    geometry = city['geometry'].iloc[0]
    geometry_cut = ox.utils_geo._quadrat_cut_geometry(geometry, quadrat_width=500)
    polylist = [poly for poly in geometry_cut.geoms]
    polyframe = gpd.GeoDataFrame(geometry=polylist)
    polyframe.crs = city.crs
    city_data = ox.project_gdf(polyframe, to_latlong=True)
    # city_data = city_data.to_crs(target_crs)  # Reproject to the target CRS
    # city_data['geometry_center'] = city_data['geometry'].centroid
    return city_data


def add_contains_place_column(city_data, places_data, contains_col_name):
    places_points = [Point(x) for x in places_data['geometry']]
    city_data[contains_col_name] = False

    for index, row in city_data.iterrows():
        grid_polygon = row['geometry']
        for place_point in places_points:
            if grid_polygon.contains(place_point):
                city_data.at[index, contains_col_name] = True
                break  # Exit the inner loop after finding a match

    return city_data


def load_poi_data(path, target_crs="epsg:3005"):
    df = pd.read_csv(path)
    poi_data = gpd.GeoDataFrame(
        df.loc[:, [c for c in df.columns if c != "geometry"]],
        geometry=gpd.GeoSeries.from_wkt(df["geometry"]),
        crs="epsg:3005",  # Assuming the original CRS is EPSG:4326 (geographic CRS)
    )
    poi_data = poi_data.to_crs(target_crs)  # Reproject to the target CRS
    poi_data['geometry_center'] = poi_data['geometry'].centroid
    return poi_data

def add_contains_place_column(city_data, poi_data, contains_col_name):
    places_points = poi_data['geometry'].centroid
    city_data[contains_col_name] = 0  # Initialize the column with zero counts

    for index, row in city_data.iterrows():
        grid_polygon = row['geometry']
        count = 0  # Counter for the number of occurrences

        for place_point in places_points:
            if grid_polygon.contains(place_point):
                count += 1

        city_data.at[index, contains_col_name] = count

    return city_data

In [None]:
path_to_downlaoded_data = "C:/Users/Ramesh Babu/BuzzOnEarth/datasets/raw/collected_data"
cities = glob(f"{path_to_downlaoded_data}/germany_data/*")
cities = [city.split("\\")[-1] for city in cities]
print(cities)

for city in tqdm(cities):
    print(city)
    if city == "Berlin":
        pass
    else:
        continue

    city_name = city
    city_data = load_city_data_by_name(city)
    poi_data_list = list(glob(f"{path_to_downlaoded_data}/germany_data/{city_name}/*.csv"))

    for poi_data_path in tqdm(poi_data_list):
        poi_data = load_poi_data(poi_data_path)
        # Add contains_place column with a custom name
        containment_col_name = poi_data_path.split('/')[-1].split('.')[0].split('-')[-1]
        city_data = add_contains_place_column(city_data, poi_data, containment_col_name)
        # Print the updated city data
        # print(containment_col_name, city_data[containment_col_name].value_counts())
    city_data.to_csv(f"/{path_to_downlaoded_data}/final_data_combo/{city_name}_data.csv")
    

In [None]:
city_data_path = glob("{path_to_downlaoded_data}/final_data_combo/*.csv")

all_city_data = []

for city_path in tqdm(city_data_path):
    city_name = city_path.split('/')[-1].split('_')[0]
    city_data = pd.read_csv(city_path)
    city_data['city'] = city_name
    all_city_data.append(city_data)

all_city_data = pd.concat(all_city_data)
all_city_data.to_csv("{path_to_downlaoded_data}/final_data_combo/all_city_data.csv")

all_city_data.head()

In [None]:
city_data_path = glob("{path_to_downlaoded_data}/final_data_combo/*.csv")

all_city_data = []

for city_path in tqdm(city_data_path):
    city_name = city_path.split('/')[-1].split('_')[0]
    city_data = pd.read_csv(city_path)
    city_data['city'] = city_name
    all_city_data.append(city_data)

all_city_data = pd.concat(all_city_data)
all_city_data.to_csv("{path_to_downlaoded_data}/final_data_combo/all_city_data.csv")

all_city_data.head()

In [3]:
def add_population_column(city_data, poi_data, contains_col_name):
    places_points = poi_data['geometry'].centroid
    city_data[contains_col_name] = None  # Initialize the column with None values

    for city_index, city_row in city_data.iterrows():
        grid_polygon = city_row['geometry']
        value = None  # Placeholder for the value of poi

        for poi_index, place_point in places_points.iteritems():
            if grid_polygon.contains(place_point):
                poi_value = poi_data.at[poi_index, 'population']
                value = poi_value
                break

        city_data.at[city_index, contains_col_name] = value

    return city_data


In [None]:
city_data_path = glob("{path_to_downlaoded_data}/final_data_combo/*.csv")

all_city_data = []

for city_path in tqdm(city_data_path):
    if "all_city_data_with_pop" in city_path:
        continue
    city_name = city_path.split('/')[-1].split('_')[0]
    city_data = pd.read_csv(city_path)
    city_data['city'] = city_name
    city_data['geometry'] = city_data['geometry'].apply(wkt.loads)
    city_data = gpd.GeoDataFrame(city_data, geometry='geometry', crs="epsg:3005")

    #load population data
    #{path_to_downlaoded_data}/population_data/pop_data_v1
    population_data_path = f"{path_to_downlaoded_data}/population_data/{city_name}, Germany_pop.csv"
    population_data_path = load_poi_data(population_data_path)
    city_data = add_population_column(city_data, population_data_path, 'population')
    all_city_data.append(city_data)
all_city_data = pd.concat(all_city_data)
all_city_data.to_csv("{path_to_downlaoded_data}/final_data_combo/all_city_data_with_pop.csv")

all_city_data.head()

In [None]:
all_city_data.sum()