<a href="https://colab.research.google.com/github/Ashi743/my_projects-/blob/main/air_quality_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import requests
import json
import pandas as pd  # For converting data into a DataFrame (optional)


# Example API URL
url = "https://api.energyandcleanair.org/stations?country=GB,US,TR,PH,IN,TH&format=geojson"  # Replace with your API endpoint

# Make the GET request
response = requests.get(url)

# Check for successful request
if response.status_code == 200:
    print("API request successful!")
    data = response.json()  # Parse the JSON response
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")


API request successful!


In [15]:
features = data["features"]

extracted_data = []
for feature in features:
    geometry = feature["geometry"]
    properties = feature["properties"]
    extracted_data.append({
        "longitude": geometry["coordinates"][0],
        "latitude": geometry["coordinates"][1],
        "name": properties["name"],
        "pollutants": properties["pollutants"],
        "last_updated": properties["last_updated"],
        "city_name": properties["city_name"],
        "timezone": properties["timezone"]
    })

# Convert to DataFrame
df = pd.DataFrame(extracted_data)
df

Unnamed: 0,longitude,latitude,name,pollutants,last_updated,city_name,timezone
0,-157.969131,21.392834,Pearl City,"[pm10, pm25]",2022-04-06T00:00:00+00:00,"Pearl City, Manana",Pacific/Honolulu
1,-119.789000,39.506000,Plumb-Kit,[pm10],2018-01-04T00:00:00+00:00,Reno,America/Los_Angeles
2,-90.705200,38.490200,Pacific,[o3],2024-08-24T00:00:00+00:00,Pacific,America/Chicago
3,-118.823033,45.652231,Pendleton - McKay Cr,[pm25],2024-08-24T00:00:00+00:00,Pendleton,America/Los_Angeles
4,-87.270800,30.368100,Pensacola NAS,"[o3, pm25]",2024-08-24T00:00:00+00:00,Warrington,America/Chicago
...,...,...,...,...,...,...,...
6099,-1.918235,52.481346,station_gb1097a,"[no2, o3]",2024-08-18T00:00:00+00:00,Birmingham,Europe/London
6100,-0.037418,51.449674,station_gb1098a,"[pm10, pm25]",2024-08-17T00:00:00+00:00,London,Europe/London
6101,-1.986008,52.508337,station_gb1099a,[no2],2024-08-17T00:00:00+00:00,West Bromwich,Europe/London
6102,-2.453492,53.115941,station_gb1100a,[no2],2024-08-17T00:00:00+00:00,Crewe,Europe/London


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6104 entries, 0 to 6103
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   longitude     6104 non-null   float64
 1   latitude      6104 non-null   float64
 2   name          6104 non-null   object 
 3   pollutants    6050 non-null   object 
 4   last_updated  6104 non-null   object 
 5   city_name     6104 non-null   object 
 6   timezone      5720 non-null   object 
dtypes: float64(2), object(5)
memory usage: 333.9+ KB


In [17]:
import geopandas as gpd

# Upload the GeoJSON file
from google.colab import files
uploaded = files.upload()



Saving countries.geojson to countries.geojson


In [21]:
# Read the GeoJSON file using Geopandas
file_name = list(uploaded.keys())[0]  # Get the uploaded file name
countries_gdf = gpd.read_file(file_name)
countries_gdf.head()


Unnamed: 0,ADMIN,ISO_A3,ISO_A2,geometry
0,Aruba,ABW,AW,"MULTIPOLYGON (((-69.99694 12.57758, -69.93639 ..."
1,Afghanistan,AFG,AF,"MULTIPOLYGON (((71.0498 38.40866, 71.05714 38...."
2,Angola,AGO,AO,"MULTIPOLYGON (((11.73752 -16.69258, 11.73851 -..."
3,Anguilla,AIA,AI,"MULTIPOLYGON (((-63.03767 18.21296, -63.09952 ..."
4,Albania,ALB,AL,"MULTIPOLYGON (((19.74777 42.5789, 19.74601 42...."


In [76]:
stations_gdf = gpd.GeoDataFrame.from_features(data['features'])
stations_gdf.head(11)

Unnamed: 0,geometry,last_scraped_data,type,gpw,level,infos,gadm1_id,names,last_updated,city_id,attribution,gadm2_id,country_id,source,first_updated,pollutants,name,id,timezone,show_in_dashboard,city_name
0,POINT (-157.96913 21.39283),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.12_1,[Pearl City],2022-04-06T00:00:00+00:00,"pearl city, manana_usa.12_1_us",,usa.12.2_1,US,openaq,,"[pm10, pm25]",Pearl City,1026,Pacific/Honolulu,False,"Pearl City, Manana"
1,POINT (-119.789 39.506),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.29_1,[Plumb-Kit],2018-01-04T00:00:00+00:00,reno_usa.29_1_us,,usa.29.16_1,US,openaq,,[pm10],Plumb-Kit,1046,America/Los_Angeles,False,Reno
2,POINT (-90.7052 38.4902),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.26_1,[Pacific],2024-08-24T00:00:00+00:00,pacific_usa.26_1_us,,usa.26.95_1,US,openaq,,[o3],Pacific,1002,America/Chicago,False,Pacific
3,POINT (-118.82303 45.65223),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.38_1,[Pendleton - McKay Cr],2024-08-24T00:00:00+00:00,pendleton_usa.38_1_us,,usa.38.30_1,US,openaq,,[pm25],Pendleton - McKay Cr,1029,America/Los_Angeles,False,Pendleton
4,POINT (-87.2708 30.3681),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.10_1,[Pensacola NAS],2024-08-24T00:00:00+00:00,warrington_usa.10_1_us,,usa.10.16_1,US,openaq,,"[o3, pm25]",Pensacola NAS,1030,America/Chicago,False,Warrington
5,POINT (-94.68395 39.05888),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.17_1,[1235984],2024-08-24T00:00:00+00:00,merriam_usa.17_1_us,,usa.17.105_1,US,openaq,,"[pm10, pm25]",1235984,1235984,America/Chicago,False,Merriam
6,POINT (-109.8919 34.8225),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.3_1,[Petrified Forest NP],2022-05-21T00:00:00+00:00,holbrook_usa.3_1_us,,usa.3.10_1,US,openaq,,[o3],Petrified Forest NP,1034,America/Phoenix,False,Holbrook
7,POINT (-118.1167 36.8028),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.5_1,[Fort Independence Me],2020-01-13T00:00:00+00:00,lone pine_usa.5_1_us,,usa.5.14_1,US,openaq,,[pm10],Fort Independence Me,1370,America/Los_Angeles,False,Lone Pine
8,POINT (-117.58981 34.42505),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.5_1,[Phelan - Beekley Roa],2022-05-21T00:00:00+00:00,phelan_usa.5_1_us,,usa.5.36_1,US,openaq,,[o3],Phelan - Beekley Roa,1035,America/Los_Angeles,False,Phelan
9,POINT (38.39856 40.91445),,background,,station,"{'city_code': '28.0', 'TownId': 'd92d9a8d-91f6...",tur.34_1,,2024-06-25T00:00:00+00:00,giresun_tur.34_1_tr,,tur.34.12_1,TR,csb,,"[co, no2, o3, pm10, pm25, so2]",Giresun - Gemilercekeği,0dfb1fb4-230e-417d-92cb-e354d8495ef9,,False,Giresun


In [75]:
# Initialize an empty list to store rows containing 'pm10'
pm10_rows = []

# Iterate through the GeoDataFrame
for index, row in stations_gdf.iterrows():
    # Check if 'pollutants' key exists and its value is iterable and not None
    if row["pollutants"] and "pm10" in row["pollutants"]:
        pm10_rows.append(row)

# Create a new GeoDataFrame from the filtered rows
gdf_pm10 = gpd.GeoDataFrame(pm10_rows, geometry="geometry")

# Display the new GeoDataFrame
gdf_pm10.head()


Unnamed: 0,geometry,last_scraped_data,type,gpw,level,infos,gadm1_id,names,last_updated,city_id,attribution,gadm2_id,country_id,source,first_updated,pollutants,name,id,timezone,show_in_dashboard,city_name
0,POINT (-157.96913 21.39283),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.12_1,[Pearl City],2022-04-06T00:00:00+00:00,"pearl city, manana_usa.12_1_us",,usa.12.2_1,US,openaq,,"[pm10, pm25]",Pearl City,1026,Pacific/Honolulu,False,"Pearl City, Manana"
1,POINT (-119.789 39.506),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.29_1,[Plumb-Kit],2018-01-04T00:00:00+00:00,reno_usa.29_1_us,,usa.29.16_1,US,openaq,,[pm10],Plumb-Kit,1046,America/Los_Angeles,False,Reno
5,POINT (-94.68395 39.05888),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.17_1,[1235984],2024-08-24T00:00:00+00:00,merriam_usa.17_1_us,,usa.17.105_1,US,openaq,,"[pm10, pm25]",1235984,1235984,America/Chicago,False,Merriam
7,POINT (-118.1167 36.8028),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.5_1,[Fort Independence Me],2020-01-13T00:00:00+00:00,lone pine_usa.5_1_us,,usa.5.14_1,US,openaq,,[pm10],Fort Independence Me,1370,America/Los_Angeles,False,Lone Pine
9,POINT (38.39856 40.91445),,background,,station,"{'city_code': '28.0', 'TownId': 'd92d9a8d-91f6...",tur.34_1,,2024-06-25T00:00:00+00:00,giresun_tur.34_1_tr,,tur.34.12_1,TR,csb,,"[co, no2, o3, pm10, pm25, so2]",Giresun - Gemilercekeği,0dfb1fb4-230e-417d-92cb-e354d8495ef9,,False,Giresun


In [77]:
gdf_pm10["longitude"] = gdf_pm10["geometry"].x
gdf_pm10["latitude"] = gdf_pm10["geometry"].y



In [79]:
gdf_pm10

Unnamed: 0,geometry,last_scraped_data,type,gpw,level,infos,gadm1_id,names,last_updated,city_id,attribution,gadm2_id,country_id,source,first_updated,pollutants,name,id,timezone,show_in_dashboard,city_name,longitude,latitude
0,POINT (-157.96913 21.39283),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.12_1,[Pearl City],2022-04-06T00:00:00+00:00,"pearl city, manana_usa.12_1_us",,usa.12.2_1,US,openaq,,"[pm10, pm25]",Pearl City,1026,Pacific/Honolulu,False,"Pearl City, Manana",-157.969131,21.392834
1,POINT (-119.789 39.506),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.29_1,[Plumb-Kit],2018-01-04T00:00:00+00:00,reno_usa.29_1_us,,usa.29.16_1,US,openaq,,[pm10],Plumb-Kit,1046,America/Los_Angeles,False,Reno,-119.789000,39.506000
5,POINT (-94.68395 39.05888),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.17_1,[1235984],2024-08-24T00:00:00+00:00,merriam_usa.17_1_us,,usa.17.105_1,US,openaq,,"[pm10, pm25]",1235984,1235984,America/Chicago,False,Merriam,-94.683951,39.058881
7,POINT (-118.1167 36.8028),,,,station,"{'sources': '', 'isMobile': False, 'isAnalysis...",usa.5_1,[Fort Independence Me],2020-01-13T00:00:00+00:00,lone pine_usa.5_1_us,,usa.5.14_1,US,openaq,,[pm10],Fort Independence Me,1370,America/Los_Angeles,False,Lone Pine,-118.116700,36.802800
9,POINT (38.39856 40.91445),,background,,station,"{'city_code': '28.0', 'TownId': 'd92d9a8d-91f6...",tur.34_1,,2024-06-25T00:00:00+00:00,giresun_tur.34_1_tr,,tur.34.12_1,TR,csb,,"[co, no2, o3, pm10, pm25, so2]",Giresun - Gemilercekeği,0dfb1fb4-230e-417d-92cb-e354d8495ef9,,False,Giresun,38.398564,40.914445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6084,POINT (-4.73442 55.94408),,traffic,,station,,gbr.3_1,[station_gb1062a],2024-08-17T00:00:00+00:00,greenock_gbr.3_1_gb,,gbr.3.18_1,GB,eea,,"[no2, pm10, pm25]",station_gb1062a,station_gb1062a,Europe/London,False,Greenock,-4.734421,55.944079
6093,POINT (-2.58398 51.45797),,traffic,,station,,gbr.1_1,[station_gb1072a],2024-08-17T00:00:00+00:00,bristol_gbr.1_1_gb,,gbr.1.12_1,GB,eea,,"[no2, pm10]",station_gb1072a,station_gb1072a,Europe/London,False,Bristol,-2.583975,51.457968
6095,POINT (-1.09556 50.79834),,traffic,,station,,gbr.1_1,[station_gb1093a],2024-08-17T00:00:00+00:00,portsmouth_gbr.1_1_gb,,gbr.1.73_1,GB,eea,,"[no2, pm10]",station_gb1093a,station_gb1093a,Europe/London,False,Portsmouth,-1.095558,50.798339
6097,POINT (-3.15231 51.49096),,traffic,,station,,gbr.4_1,[station_gb1095a],2024-08-17T00:00:00+00:00,cardiff_gbr.4_1_gb,,gbr.4.5_1,GB,eea,,"[no2, pm10]",station_gb1095a,station_gb1095a,Europe/London,False,Cardiff,-3.152305,51.490960


In [78]:
gdf_pm10["country_id"].unique()

array(['US', 'TR', 'TH', 'PH', 'IN', 'GB'], dtype=object)

In [22]:
selected_countries = ["United States", "United Kingdom", "Turkey", "Thailand", "Philippines", "India"]
filtered_countries = countries_gdf[countries_gdf["ADMIN"].isin(selected_countries)]

In [61]:
filtered_countries.head()

Unnamed: 0,ADMIN,ISO_A3,ISO_A2,geometry,Area_km2
81,United Kingdom,GBR,GB,"MULTIPOLYGON (((-6.28751 49.91401, -6.29727 49...",243782.5
104,India,IND,IN,"MULTIPOLYGON (((93.85532 7.21418, 93.865 7.200...",3151478.0
179,Philippines,PHL,PH,"MULTIPOLYGON (((119.84978 4.79686, 119.83399 4...",293237.5
223,Thailand,THA,TH,"MULTIPOLYGON (((99.24781 6.57486, 99.26026 6.5...",514453.5
230,Turkey,TUR,TR,"MULTIPOLYGON (((26.04005 39.84504, 26.04623 39...",780080.2


In [86]:
import folium
from folium.plugins import HeatMap

# Step 1: Calculate the number of stations per country
stations_per_country = gdf_pm10.groupby("country_id").size().reset_index(name="station_count")

# Step 2: Merge station data with country GeoDataFrame
filtered_countries["centroid"] = filtered_countries.geometry.centroid  # Calculate centroids
country_data = filtered_countries.merge(stations_per_country, left_on="ISO_A3", right_on="country_id", how="left")

# Step 3: Prepare heatmap data (latitude and longitude of stations)
heatmap_data = gdf_pm10[["geometry"]].apply(lambda row: [row.geometry.y, row.geometry.x], axis=1).tolist()

# Step 4: Create a base map
m = folium.Map(location=[0, 0], zoom_start=2)

# Step 5: Add the HeatMap layer
HeatMap(data=heatmap_data, radius=10, blur=15, max_zoom=1).add_to(m)

# Step 6: Add country names and station counts
for _, row in country_data.iterrows():
    if not pd.isna(row["station_count"]):  # Only include countries with stations
        folium.Marker(
            location=[row["centroid"].y, row["centroid"].x],
            popup=f"{row['ADMIN']}: {int(row['station_count'])} stations",
            icon=folium.Icon(color="blue", icon="info-sign"),
        ).add_to(m)
m


  filtered_countries["centroid"] = filtered_countries.geometry.centroid  # Calculate centroids
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [51]:

stations_in_selected_countries = gdf_pm10[gdf_pm10["country_id"].isin(["GB", "US", "TR", "PH", "IN", "TH"])]

In [53]:
stations_per_country = gpd.sjoin(
    filtered_countries, stations_in_selected_countries, how="left", predicate="contains"
).groupby("ADMIN").size().reset_index(name="PM10 Stations")

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: None

  stations_per_country = gpd.sjoin(


In [54]:
filtered_countries["Area_km2"] = filtered_countries.to_crs({"proj": "cea"}).area / 1e6  # Convert to square kilometers
result = pd.merge(
    filtered_countries[["ADMIN", "Area_km2"]],
    stations_per_country,
    left_on="ADMIN",
    right_on="ADMIN",
    how="left",
)

result["Density_per_1000_km2"] = result["PM10 Stations"] / result["Area_km2"] * 1000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [55]:
final_table = result.sort_values(by="Density_per_1000_km2", ascending=False)[
    ["ADMIN", "PM10 Stations", "Area_km2", "Density_per_1000_km2"]
].rename(columns={
    "ADMIN": "Country Name",
    "PM10 Stations": "Number of PM10 Stations",
    "Area_km2": "Area (in square kilometers)",
    "Density_per_1000_km2": "Density of PM10 Stations per 1,000 sq. km",
})

In [56]:
final_table

Unnamed: 0,Country Name,Number of PM10 Stations,Area (in square kilometers),"Density of PM10 Stations per 1,000 sq. km"
0,United Kingdom,163,243782.5,0.668629
4,Turkey,324,780080.2,0.415342
3,Thailand,161,514453.5,0.312953
1,India,534,3151478.0,0.169444
2,Philippines,19,293237.5,0.064794
