In [None]:
import http.client

conn = http.client.HTTPSConnection("airquality.cpcb.gov.in")

conn.request("GET", "/caaqms/rss_feed")

res = conn.getresponse()
data = res.read()

print(data.decode("utf-8"))

<?xml version='1.0' encoding='UTF-8'?>
<AqIndex>
  <Country id="India">
    <State id="Andaman and Nicobar">
      <City id="Sri Vijaya Puram">
        <Station id="Police Line, Sri Vijaya Puram - ANPCC" lastupdate="31-03-2025 13:00:00" latitude="11.654054" longitude="92.734055">
          <Pollutant_Index id="PM2.5" Min="8" Max="50" Avg="27" Hourly_sub_index="14"/>
          <Pollutant_Index id="PM10" Min="46" Max="79" Avg="63" Hourly_sub_index="78"/>
          <Pollutant_Index id="NO2" Min="20" Max="32" Avg="26" Hourly_sub_index="24"/>
          <Pollutant_Index id="NH3" Min="1" Max="3" Avg="2" Hourly_sub_index="2"/>
          <Pollutant_Index id="SO2" Min="1" Max="17" Avg="9" Hourly_sub_index="15"/>
          <Pollutant_Index id="CO" Min="26" Max="68" Avg="34" Hourly_sub_index="31"/>
          <Pollutant_Index id="OZONE" Min="2" Max="11" Avg="4" Hourly_sub_index="5"/>
          <Air_Quality_Index Value="63" Predominant_Parameter="PM10"/>
        </Station>
      </City>
    </State>

In [None]:
from datetime import datetime
import pytz

# Define IST timezone
ist = pytz.timezone('Asia/Kolkata')

# Get current time in IST
current_time_ist = datetime.now(ist)

# Print the current date and time in IST
print("Current Date and Time in IST:", current_time_ist.strftime('%Y-%m-%d %H:%M:%S'))


Current Date and Time in IST: 2025-03-30 11:37:39


In [None]:
from datetime import datetime
from zoneinfo import ZoneInfo

# Get current time in IST dynamically
ist_time = datetime.now(ZoneInfo("Asia/Kolkata"))

# Define target timezones
target_zones = {
    "India (IST)": ZoneInfo("Asia/Kolkata"),
    "UTC": ZoneInfo("UTC"),
    "Dubai": ZoneInfo("Asia/Dubai"),
    "London": ZoneInfo("Europe/London"),
    "New York": ZoneInfo("America/New_York"),
    "Tokyo": ZoneInfo("Asia/Tokyo")
}

# Convert and print
print(f"Current Local Time (IST): {ist_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
for zone_name, tz in target_zones.items():
    converted_time = ist_time.astimezone(tz)
    print(f"{zone_name}: {converted_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")

Current Local Time (IST): 2025-03-30 11:37:39 IST
India (IST): 2025-03-30 11:37:39 IST
UTC: 2025-03-30 06:07:39 UTC
Dubai: 2025-03-30 10:07:39 +04
London: 2025-03-30 07:07:39 BST
New York: 2025-03-30 02:07:39 EDT
Tokyo: 2025-03-30 15:07:39 JST


In [None]:
!pip install dlt[duckdb]

Collecting dlt[duckdb]
  Downloading dlt-1.9.0-py3-none-any.whl.metadata (11 kB)
Collecting giturlparse>=0.10.0 (from dlt[duckdb])
  Downloading giturlparse-0.12.0-py2.py3-none-any.whl.metadata (4.5 kB)
Collecting hexbytes>=0.2.2 (from dlt[duckdb])
  Downloading hexbytes-1.3.0-py3-none-any.whl.metadata (3.3 kB)
Collecting jsonpath-ng>=1.5.3 (from dlt[duckdb])
  Downloading jsonpath_ng-1.7.0-py3-none-any.whl.metadata (18 kB)
Collecting makefun>=1.15.0 (from dlt[duckdb])
  Downloading makefun-1.15.6-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting pathvalidate>=2.5.2 (from dlt[duckdb])
  Downloading pathvalidate-3.2.3-py3-none-any.whl.metadata (12 kB)
Collecting pendulum>=2.1.2 (from dlt[duckdb])
  Downloading pendulum-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting rich-argparse<2.0.0,>=1.6.0 (from dlt[duckdb])
  Downloading rich_argparse-1.7.0-py3-none-any.whl.metadata (14 kB)
Collecting semver>=3.0.0 (from dlt[duckdb])
  Downloading semve

In [None]:
import dlt
import requests
from datetime import datetime
from zoneinfo import ZoneInfo

API_KEY = '2af026f58f490d4f086b3cdc7c6a30b06663b0868053b264651607cff0f28275'

@dlt.resource(
    table_name="location_info",
    write_disposition="merge",
    primary_key="id",
)
def get_location_info():
    headers = {"X-API-Key": API_KEY}
    response = requests.get(
        "https://api.openaq.org/v3/locations/3409411",
        headers=headers
    )
    response.raise_for_status()
    data = response.json()
    yield data  # Yields the full API response as one dictionary

@dlt.resource(
    table_name="latest_measurements",
    write_disposition="merge",
    primary_key="sensorsId",
)
def get_latest_info():
    headers = {"X-API-Key": API_KEY}
    response = requests.get(
        "https://api.openaq.org/v3/locations/3409411/latest",
        headers=headers
    )
    response.raise_for_status()
    data = response.json()
    yield data  # Yields the full API response as one dictionary

@dlt.transformer
def joined_measurements(latest_readings):
    # Get location metadata and build a sensor mapping with sensor name and unit
    location_data = list(get_location_info())[0]  # Get the only item
    # Extract the location id and name from the first result
    if location_data["results"]:
        location_id = location_data["results"][0]["id"]
        location_name = location_data["results"][0]["name"]
    else:
        location_id = "unknown"
        location_name = "unknown"

    sensor_map = {
        sensor["id"]: {
            "name": sensor["parameter"]["name"].upper(),  # Convert sensor name to uppercase
            "unit": sensor["parameter"]["units"]
        }
        for location in location_data["results"]
        for sensor in location["sensors"]
    }

    # Compute current IST time once, formatted as desired.
    current_ist_timestamp = datetime.now(ZoneInfo("Asia/Kolkata")).strftime('%Y-%m-%d %H:%M:%S')

    # Process latest measurements by iterating over the "results" list
    for reading in latest_readings["results"]:
        sensor_info = sensor_map.get(reading["sensorsId"], {"name": "UNKNOWN", "unit": "unknown"})
        yield {
            "location_id": location_id,
            "location_name": location_name,
            "timestamp": current_ist_timestamp,
            "sensor_id": reading["sensorsId"],
            "sensor_name": sensor_info["name"],
            "value": reading["value"],
            "unit": sensor_info["unit"]
        }

pipeline = dlt.pipeline(
    pipeline_name="openaq_data",
    destination="duckdb",
    dataset_name="openaq_current_l3",
)

# Run the latest measurements resource through the transformer
load_info = pipeline.run(
    get_latest_info() | joined_measurements
)

print(load_info)

Pipeline openaq_data load step completed in 0.07 seconds
1 load package(s) were loaded to destination duckdb and into dataset openaq_current_l3
The duckdb destination used duckdb:////content/openaq_data.duckdb location to store data
Load package 1743415213.3952978 is LOADED and contains no failed jobs


In [None]:
#chatgpt
import duckdb
import pandas as pd

# Connect to the DuckDB database file
conn = duckdb.connect(database='/content/openaq_data.duckdb', read_only=False)

# (Optional) Check which tables are available:
tables = conn.execute("SHOW TABLES").fetchdf()
print("Available tables:")
print(tables)

# Query the joined_measurements table (assuming it is stored in the 'openaq_enriched' dataset)
query = """
SELECT location_id, location_name, strftime(timestamp, '%Y-%m-%d %H:%M:%S') AS timestamp, sensor_id, sensor_name, value, unit
FROM openaq_current_l3.joined_measurements
ORDER BY timestamp DESC
LIMIT 100
"""

# Execute the query and fetch the results as a Pandas DataFrame
df = conn.execute(query).fetchdf()

# Display the results
print("Sample results:")
print(df)

Available tables:
Empty DataFrame
Columns: [name]
Index: []
Sample results:
   location_id                  location_name            timestamp  sensor_id  \
0      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237790   
1      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237788   
2      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237792   
3      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237796   
4      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237794   
5      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237789   
6      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237795   
7      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237793   
8      3409411  Christianpatty, Nagaon - PCBA  2025-03-31 15:30:13   12237791   

        sensor_name  value   unit  
0               NO2   8.00    ppb  
1                CO   0.88    ppb  
2    

In [None]:
df

Unnamed: 0,location_id,location_name,timestamp,sensor_id,sensor_name,value,unit
0,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237792,PM10,112.0,µg/m³
1,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237788,CO,0.94,ppb
2,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237791,O3,28.4,µg/m³
3,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237796,TEMPERATURE,27.8,c
4,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237790,NO2,5.6,ppb
5,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237793,PM25,62.0,µg/m³
6,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237795,SO2,9.0,ppb
7,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237789,NO,1.1,ppb
8,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:53:37,12237794,RELATIVEHUMIDITY,72.0,%
9,3409411,"Christianpatty, Nagaon - PCBA",2025-03-30 11:37:52,12237796,TEMPERATURE,27.8,c


In [None]:
import dlt
import requests
from datetime import datetime
from zoneinfo import ZoneInfo

API_KEY = '2af026f58f490d4f086b3cdc7c6a30b06663b0868053b264651607cff0f28275'

@dlt.resource(
    table_name="location_info",
    #write_disposition="merge",
    primary_key="id",
)
def get_location_info():
    headers = {"X-API-Key": API_KEY}
    response = requests.get(
        "https://api.openaq.org/v3/locations/363601",
        headers=headers
    )
    response.raise_for_status()
    data = response.json()
    yield data  # Yields the full API response as one dictionary

@dlt.resource(
    table_name="latest_measurements",
    #write_disposition="merge",
    primary_key="sensorsId",
)
def get_latest_info():
    headers = {"X-API-Key": API_KEY}
    response = requests.get(
        "https://api.openaq.org/v3/locations/363601/latest",
        headers=headers
    )
    response.raise_for_status()
    data = response.json()
    yield data  # Yields the full API response as one dictionary

@dlt.transformer
def joined_measurements(latest_readings):
    # Get location metadata and build a sensor mapping with sensor name and unit
    location_data = list(get_location_info())[0]  # Get the only item
    # Extract the location id and name from the first result
    if location_data["results"]:
        location_id = location_data["results"][0]["id"]
        location_name = location_data["results"][0]["name"]
    else:
        location_id = "unknown"
        location_name = "unknown"

    sensor_map = {
        sensor["id"]: {
            "name": sensor["parameter"]["name"].upper(),  # Convert sensor name to uppercase
            "unit": sensor["parameter"]["units"]
        }
        for location in location_data["results"]
        for sensor in location["sensors"]
    }

    # Compute current IST time once, formatted as desired.
    current_ist_timestamp = datetime.now(ZoneInfo("Asia/Kolkata")).strftime('%Y-%m-%d %H:%M:%S')

    # Process latest measurements by iterating over the "results" list
    for reading in latest_readings["results"]:
        sensor_info = sensor_map.get(reading["sensorsId"], {"name": "UNKNOWN", "unit": "unknown"})
        yield {
            "location_id": location_id,
            "location_name": location_name,
            "timestamp": current_ist_timestamp,
            "sensor_id": reading["sensorsId"],
            "sensor_name": sensor_info["name"],
            "value": reading["value"],
            "unit": sensor_info["unit"]
        }

pipeline = dlt.pipeline(
    pipeline_name="openaq_data",
    destination="duckdb",
    dataset_name="openaq_sivasagar",
)

# Run the latest measurements resource through the transformer
load_info = pipeline.run(
    get_latest_info() | joined_measurements
)

print(load_info)

Pipeline openaq_data load step completed in 1.07 seconds
1 load package(s) were loaded to destination duckdb and into dataset openaq_sivasagar
The duckdb destination used duckdb:////content/openaq_data.duckdb location to store data
Load package 1743319940.421937 is LOADED and contains no failed jobs


In [None]:
#chatgpt
import duckdb
import pandas as pd

# Connect to the DuckDB database file
conn = duckdb.connect(database='/content/openaq_data.duckdb', read_only=False)

# (Optional) Check which tables are available:
tables = conn.execute("SHOW TABLES").fetchdf()
print("Available tables:")
print(tables)

# Query the joined_measurements table (assuming it is stored in the 'openaq_enriched' dataset)
query = """
SELECT location_id, location_name, strftime(timestamp, '%Y-%m-%d %H:%M:%S') AS timestamp, sensor_id, sensor_name, value, unit
FROM openaq_sivasagar.joined_measurements
ORDER BY timestamp DESC
LIMIT 100
"""

# Execute the query and fetch the results as a Pandas DataFrame
df = conn.execute(query).fetchdf()

# Display the results
print("Sample results:")
print(df)

Available tables:
Empty DataFrame
Columns: [name]
Index: []
Sample results:
    location_id                    location_name            timestamp  \
0        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
1        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
2        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
3        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
4        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
5        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
6        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
7        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
8        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
9        363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
10       363601  Girls College, Sivasagar - PCBA  2025-03-30 13:02:20   
11       363601  Girls College, Sivasagar - PCBA

location_list =[363601, 3409411, 3409391, 10903, 3409390, 42240, 3409360, 3409375]

In [None]:
#final
import dlt
import requests
import time
from datetime import datetime
from zoneinfo import ZoneInfo

API_KEY = '2af026f58f490d4f086b3cdc7c6a30b7863b0868053b264651607cff0f28275'
location_list = [3409411, 3409391, 3409390, 3409360, 3409375, 3409376] # openaq location ids having sensor sets in and around my city

@dlt.resource(
    table_name="location_info",
    write_disposition="merge",
    primary_key="id",
)
def get_location_info():
    headers = {"X-API-Key": API_KEY}
    # Iterate over each location id and yield the API response as one page.
    for loc_id in location_list:
        url = f"https://api.openaq.org/v3/locations/{loc_id}"
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
        yield data
        # Added a delay to avoid hitting rate limits
        time.sleep(1)

@dlt.resource(
    table_name="latest_measurements",
    write_disposition="append",
    primary_key="sensorsId",
)
def get_latest_info():
    headers = {"X-API-Key": API_KEY}
    # Iterate over each location id and yield the API response from the /latest endpoint.
    for loc_id in location_list:
        url = f"https://api.openaq.org/v3/locations/{loc_id}/latest"
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
        yield data
        # Added a delay to avoid hitting rate limits
        time.sleep(1)

@dlt.transformer
def joined_measurements(latest_readings):
    # Combine all location_info results into one combined dictionary.
    location_data_list = list(get_location_info())
    combined_locations = {"results": []}
    for data in location_data_list:
        if "results" in data:
            combined_locations["results"].extend(data["results"])

    # Build a sensor mapping keyed by (location id, sensor id).
    sensor_map = {}
    for location in combined_locations["results"]:
        loc_id = location.get("id")
        loc_name = location.get("name", "unknown")
        for sensor in location.get("sensors", []):
            sensor_id = sensor.get("id")
            sensor_map[(loc_id, sensor_id)] = {
                "name": sensor["parameter"]["name"].upper(),  # Uppercase sensor name.
                "unit": sensor["parameter"]["units"],
                "location_name": loc_name,
            }

    # Combine all latest_measurements results into one dictionary.
    #latest_data_list = list(get_latest_info())
    #combined_latest = {"results": []}
    #for data in latest_data_list:
        #if "results" in data:
           # combined_latest["results"].extend(data["results"])


    # Use the latest_readings passed into the transformer instead of calling get_latest_info() again.
    combined_latest = latest_readings  # This should be the complete response with "results"

    # Compute current IST time once, formatted as desired.
    current_ist_timestamp = datetime.now(ZoneInfo("Asia/Kolkata")).strftime('%Y-%m-%d %H:%M:%S')

    # Iterate over the combined latest measurements and join with sensor mapping.
    for reading in combined_latest["results"]:
        key = (reading.get("locationsId"), reading.get("sensorsId"))
        sensor_info = sensor_map.get(key, {"name": "UNKNOWN", "unit": "unknown", "location_name": "unknown"})
        yield {
            "location_id": reading.get("locationsId"),
            "location_name": sensor_info.get("location_name"),
            "timestamp": current_ist_timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            "sensor_id": reading.get("sensorsId"),
            "sensor_name": sensor_info.get("name"),
            "value": reading.get("value"),
            "unit": sensor_info.get("unit")
        }

pipeline = dlt.pipeline(
    pipeline_name="openaq_data",
    destination="duckdb",
    dataset_name="openaq_hourly_readings",
)

# Run the latest_measurements resource through the transformer.
load_info = pipeline.run(
    get_latest_info | joined_measurements # should be get_latest_info without ()
)

print(load_info)

Pipeline openaq_data load step completed in 0.05 seconds
1 load package(s) were loaded to destination duckdb and into dataset openaq_hourly_readings
The duckdb destination used duckdb:////content/openaq_data.duckdb location to store data
Load package 1743362639.8415236 is LOADED and contains no failed jobs


In [None]:
#chatgpt
import duckdb
import pandas as pd

# Connect to the DuckDB database file
conn = duckdb.connect(database='/content/openaq_data.duckdb', read_only=False)

# (Optional) Check which tables are available:
tables = conn.execute("SHOW TABLES").fetchdf()
print("Available tables:")
print(tables)

# Query the joined_measurements table (assuming it is stored in the 'openaq_enriched' dataset)
query = """
SELECT location_id, location_name, strftime(timestamp, '%Y-%m-%d %H:%M:%S') AS timestamp, sensor_id, sensor_name, value, unit
FROM openaq_hourly_readings.joined_measurements
ORDER BY timestamp DESC
LIMIT 100
"""

# Execute the query and fetch the results as a Pandas DataFrame
df = conn.execute(query).fetchdf()

# Display the results
print("Sample results:")
print(df)

Available tables:
Empty DataFrame
Columns: [name]
Index: []
Sample results:
    location_id                             location_name  \
0       3409376  Central Academy for SFS, Byrnihat - PCBA   
1       3409376  Central Academy for SFS, Byrnihat - PCBA   
2       3409376  Central Academy for SFS, Byrnihat - PCBA   
3       3409376  Central Academy for SFS, Byrnihat - PCBA   
4       3409376  Central Academy for SFS, Byrnihat - PCBA   
..          ...                                       ...   
95      3409391                   Tarapur, Silchar - PCBA   
96      3409391                   Tarapur, Silchar - PCBA   
97      3409391                   Tarapur, Silchar - PCBA   
98      3409391                   Tarapur, Silchar - PCBA   
99      3409411             Christianpatty, Nagaon - PCBA   

              timestamp  sensor_id  sensor_name   value   unit  
0   2025-03-31 00:54:57   12237491          NO2   12.60    ppb  
1   2025-03-31 00:54:57   12237497  TEMPERATURE   20.00      

In [None]:
df.shape

(100, 7)

In [None]:
df.head()

Unnamed: 0,location_id,location_name,timestamp,sensor_id,sensor_name,value,unit
0,3409376,"Central Academy for SFS, Byrnihat - PCBA",2025-03-31 00:54:57,12237491,NO2,12.6,ppb
1,3409376,"Central Academy for SFS, Byrnihat - PCBA",2025-03-31 00:54:57,12237497,TEMPERATURE,20.0,c
2,3409376,"Central Academy for SFS, Byrnihat - PCBA",2025-03-31 00:54:57,12237489,CO,1.13,ppb
3,3409376,"Central Academy for SFS, Byrnihat - PCBA",2025-03-31 00:54:57,12237492,O3,25.0,µg/m³
4,3409376,"Central Academy for SFS, Byrnihat - PCBA",2025-03-31 00:54:57,12237494,PM25,141.0,µg/m³


In [None]:
df.location_name.nunique()

6

In [None]:
df.location_name.unique()

array(['Central Academy for SFS, Byrnihat - PCBA',
       'Bata Chowk, Nalbari - PCBA', 'IITG, Guwahati - PCBA',
       'LGBI Airport, Guwahati - PCBA', 'Tarapur, Silchar - PCBA',
       'Christianpatty, Nagaon - PCBA'], dtype=object)

In [None]:
df.location_name.value_counts()

Unnamed: 0_level_0,count
location_name,Unnamed: 1_level_1
"Central Academy for SFS, Byrnihat - PCBA",18
"Bata Chowk, Nalbari - PCBA",18
"IITG, Guwahati - PCBA",18
"LGBI Airport, Guwahati - PCBA",18
"Tarapur, Silchar - PCBA",18
"Christianpatty, Nagaon - PCBA",10


In [None]:
df[df.location_name=='Christianpatty, Nagaon - PCBA']

Unnamed: 0,location_id,location_name,timestamp,sensor_id,sensor_name,value,unit
45,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237790,NO2,7.8,ppb
46,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237792,PM10,131.0,µg/m³
47,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237794,RELATIVEHUMIDITY,80.0,%
48,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237789,NO,2.1,ppb
49,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237795,SO2,13.1,ppb
50,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237796,TEMPERATURE,25.1,c
51,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237791,O3,31.7,µg/m³
52,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237788,CO,1.09,ppb
53,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:49:57,12237793,PM25,73.0,µg/m³
99,3409411,"Christianpatty, Nagaon - PCBA",2025-03-31 00:47:36,12237789,NO,2.1,ppb


In [None]:
result = df.groupby('location_name')['sensor_id'].unique().reset_index()
print(result)

                              location_name  \
0                Bata Chowk, Nalbari - PCBA   
1  Central Academy for SFS, Byrnihat - PCBA   
2             Christianpatty, Nagaon - PCBA   
3                     IITG, Guwahati - PCBA   
4             LGBI Airport, Guwahati - PCBA   
5                   Tarapur, Silchar - PCBA   

                                           sensor_id  
0  [12237482, 12237488, 12237480, 12237484, 12237...  
1  [12237491, 12237489, 12237490, 12237497, 12237...  
2  [12237790, 12237792, 12237794, 12237789, 12237...  
3  [12237370, 12237364, 12237367, 12237369, 12237...  
4  [12237615, 12237614, 12237621, 12237616, 12237...  
5  [12237629, 12237624, 12237623, 12237628, 12237...  
