# Filter rooftop data to pilot districts
Import all of the s2 files and filter for rooftops within the pilot districts

**Purpose:**  


**Contents:**  
1. Import packages and set paths
2. Import SHRUG district boundary data and filter for pilot districts
3. Use utils stuff 

## 1. Import packages and set paths

In [None]:
from pathlib import Path

import geopandas as gpd
import pandas as pd
from tqdm import tqdm

from rooftop_tools.utils_rooftop import (
    get_matched_rooftop_centroids_from_s2_file,
    get_overlapping_s2_cell_ids,
)

In [None]:
# set paths
FH_SAMPLING_FOLDER = Path("../") / "data" / "fortify_data"
SHRUG_district_path = (
    FH_SAMPLING_FOLDER / "Shape files/shrug-pc11dist-poly-shp/district.shp"
)

## 2. Import SHRUG district boundary data and filter for pilot districts

In [None]:
# import SHRUG district boundary and select just the pilot districts
SHRUG_districts = gpd.read_file(SHRUG_district_path)
pilot_districts = SHRUG_districts[
    SHRUG_districts["d_name"].isin(
        ["Katni", "Sirmaur", "Gandhinagar", "Latur", "Indore"]
    )
]
# print the gdf to make sure there were no spelling mistakes in district names
pilot_districts

## 3. For each s2 file, filter for rooftops in districts

In [None]:
s2_cell_ids = get_overlapping_s2_cell_ids(pilot_districts)
matched_rooftop_centroids_gdf_list = []

for s2_cell_id in tqdm(s2_cell_ids):
    matched_rooftop_centroids_gdf = get_matched_rooftop_centroids_from_s2_file(
        s2_file_dir=FH_SAMPLING_FOLDER / "Rooftop Data",
        s2_cell_id=s2_cell_id,
        boundaries_gdf=pilot_districts,
    )
    matched_rooftop_centroids_gdf_list.append(matched_rooftop_centroids_gdf)

In [None]:
# concatenate the gdfs
matched_rooftop_centroids_gdf = pd.concat(
    matched_rooftop_centroids_gdf_list, ignore_index=True
)
# Save the matched rooftops data
matched_rooftop_centroids_gdf.to_parquet(
    FH_SAMPLING_FOLDER / "Cleaned rooftop data" / "Rooftops in 5 pilot dists.parquet"
)