In [7]:
import pandas as pd

# Load the Excel file
file_path = r"D:\data\TYBA1.xlsx"  # Replace with your file path
df = pd.read_excel(file_path)

# Ensure Pincode column is treated as integers
if df['PINCODE'].dtype != 'int64':  # Convert to integers if needed
    df['PINCODE'] = pd.to_numeric(df['PINCODE'], errors='coerce', downcast='integer')

# Specify the pincodes to include
desired_pincodes = [
    400024, 400072, 400070, 400022, 400019, 400014, 400028, 400012,
    400013, 400008, 400011, 400027, 400033, 400009, 400003, 400001,
    400077, 400086, 400075, 400083, 400079, 400042, 400080, 400082,
    400081, 400601, 400602, 400603, 400604, 400605, 400606, 400607,
    400608, 400610, 400612, 400615
]

# Filter the data to include only the specified pincodes
filtered_df = df[df['PINCODE'].isin(desired_pincodes)]

# Define the station-pincode mapping in the required flow
station_pincode_mapping = [
    {"STATION": "Kurla East", "PINCODE": 400024},
    {"STATION": "Kurla West", "PINCODE": 400072},
    {"STATION": "Kurla West", "PINCODE": 400070},
    {"STATION": "Sion East", "PINCODE": 400022},
    {"STATION": "Matunga", "PINCODE": 400019},
    {"STATION": "Dadar East", "PINCODE": 400014},
    {"STATION": "Dadar West", "PINCODE": 400028},
    {"STATION": "Parel", "PINCODE": 400012},
    {"STATION": "Lower Parel Currey Road", "PINCODE": 400013},
    {"STATION": "Byculla", "PINCODE": 400008},
    {"STATION": "Byculla", "PINCODE": 400011},
    {"STATION": "Byculla", "PINCODE": 400027},
    {"STATION": "Byculla", "PINCODE": 400033},
    {"STATION": "Mumbai Sandhurst Road", "PINCODE": 400009},
    {"STATION": "Masjid", "PINCODE": 400003},
    {"STATION": "CSMT", "PINCODE": 400001},
    {"STATION": "Vidyavihar", "PINCODE": 400077},
    {"STATION": "Vidyavihar", "PINCODE": 400086},
    {"STATION": "Ghatkopar East", "PINCODE": 400075},
    {"STATION": "Vikhroli East", "PINCODE": 400083},
    {"STATION": "Vikhroli West", "PINCODE": 400079},
    {"STATION": "Kanjurmarg East, Bhandup East", "PINCODE": 400042},
    {"STATION": "Nahur West, Mulund West", "PINCODE": 400080},
    {"STATION": "Mulund Colony, Mulund West", "PINCODE": 400082},
    {"STATION": "Mulund East", "PINCODE": 400081},
    {"STATION": "Thane", "PINCODE": 400601},
    {"STATION": "Thane", "PINCODE": 400602},
    {"STATION": "Thane", "PINCODE": 400603},
    {"STATION": "Thane", "PINCODE": 400604},
    {"STATION": "Thane, Kalwa", "PINCODE": 400605},
    {"STATION": "Thane", "PINCODE": 400606},
    {"STATION": "Thane", "PINCODE": 400607},
    {"STATION": "Thane Balkum", "PINCODE": 400608},
    {"STATION": "Thane", "PINCODE": 400610},
    {"STATION": "Thane", "PINCODE": 400612},
    {"STATION": "Thane", "PINCODE": 400615},
]

# Create a DataFrame from the mapping
mapping_df = pd.DataFrame(station_pincode_mapping)

# Merge the mapping with the filtered data
merged_df = pd.merge(filtered_df, mapping_df, on="PINCODE", how="left")

# Group by Station and Pincode to calculate counts
result_df = merged_df.groupby(["STATION", "PINCODE"]).size().reset_index(name="COUNT")

# Enforce the desired order
desired_order = {entry["PINCODE"]: index for index, entry in enumerate(station_pincode_mapping)}
result_df["ORDER"] = result_df["PINCODE"].map(desired_order)
result_df = result_df.sort_values(by="ORDER").drop(columns=["ORDER"])

# Save the resulting DataFrame to an Excel file
output_file = r"D:\data\station_pincode_counts_filtered.xlsx"
result_df.to_excel(output_file, index=False, engine="openpyxl")

print(f"Filtered data saved to: {output_file}")


Filtered data saved to: D:\data\station_pincode_counts_filtered.xlsx
