Maps city location to latitude and longitude values.

Needs to be edited for use with specific stations.

Imports

In [2]:
import pandas as pd
from geopy.geocoders import Nominatim
import time

Load dataset

In [3]:
# Loading the dataset
df = pd.read_csv('city_day.csv')
# Converting 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])
# Displaying the first few rows of the dataframe
print(df.head())

        City       Date  PM2.5  PM10     NO    NO2    NOx  NH3     CO    SO2  \
0  Ahmedabad 2015-01-01    NaN   NaN   0.92  18.22  17.15  NaN   0.92  27.64   
1  Ahmedabad 2015-01-02    NaN   NaN   0.97  15.69  16.46  NaN   0.97  24.55   
2  Ahmedabad 2015-01-03    NaN   NaN  17.40  19.30  29.70  NaN  17.40  29.07   
3  Ahmedabad 2015-01-04    NaN   NaN   1.70  18.48  17.97  NaN   1.70  18.59   
4  Ahmedabad 2015-01-05    NaN   NaN  22.10  21.42  37.76  NaN  22.10  39.33   

       O3  Benzene  Toluene  Xylene  AQI AQI_Bucket  
0  133.36     0.00     0.02    0.00  NaN        NaN  
1   34.06     3.68     5.50    3.77  NaN        NaN  
2   30.70     6.80    16.40    2.25  NaN        NaN  
3   36.08     4.43    10.14    1.00  NaN        NaN  
4   39.31     7.01    18.89    2.78  NaN        NaN  


Using geocoder to get location of place - used chat gpt to assist

In [4]:
# Initialize geocoder
geolocator = Nominatim(user_agent="geoapi")

def get_coords(city):
    try:
        location = geolocator.geocode(city + ", India")
        return (location.latitude, location.longitude)
    except:
        print(f"Could not geocode city: {city}")
        return (None, None)

# Step 1: Get unique cities
unique_cities = df["City"].unique()

# Step 2: Geocode each unique city (with delay to avoid rate-limit issues)
city_coords = {}
for city in unique_cities:
    latlon = get_coords(city)
    city_coords[city] = latlon
    time.sleep(1)  # respect API rate limit

# Step 3: Map coordinates back to DataFrame
df[["lat", "lon"]] = df["City"].map(city_coords).apply(pd.Series)

print(df.head())

        City       Date  PM2.5  PM10     NO    NO2    NOx  NH3     CO    SO2  \
0  Ahmedabad 2015-01-01    NaN   NaN   0.92  18.22  17.15  NaN   0.92  27.64   
1  Ahmedabad 2015-01-02    NaN   NaN   0.97  15.69  16.46  NaN   0.97  24.55   
2  Ahmedabad 2015-01-03    NaN   NaN  17.40  19.30  29.70  NaN  17.40  29.07   
3  Ahmedabad 2015-01-04    NaN   NaN   1.70  18.48  17.97  NaN   1.70  18.59   
4  Ahmedabad 2015-01-05    NaN   NaN  22.10  21.42  37.76  NaN  22.10  39.33   

       O3  Benzene  Toluene  Xylene  AQI AQI_Bucket        lat        lon  
0  133.36     0.00     0.02    0.00  NaN        NaN  23.021537  72.580057  
1   34.06     3.68     5.50    3.77  NaN        NaN  23.021537  72.580057  
2   30.70     6.80    16.40    2.25  NaN        NaN  23.021537  72.580057  
3   36.08     4.43    10.14    1.00  NaN        NaN  23.021537  72.580057  
4   39.31     7.01    18.89    2.78  NaN        NaN  23.021537  72.580057  


exporting csv

In [5]:
df.to_csv('city_day_with_coords.csv', index=False)

for stations:

In [9]:
# Load stations data
df_stations = pd.read_csv('stations.csv')

# Step 1: Create a "City, State" column
df_stations['City_State'] = df_stations['City'] + ', ' + df_stations['State']

# Step 2: Geocode each city-state
city_coords = {}
for city_state in df_stations['City_State']:
    latlon = get_coords(city_state)  # Your geocoding function
    city_coords[city_state] = latlon
    time.sleep(1)  # Avoid rate-limit issues


# Step 3: Map coordinates back to the DataFrame
df_stations[['lat', 'lon']] = df_stations['City_State'].map(city_coords).apply(pd.Series)

df_stations.drop(columns=['City_State'], inplace=True)
print(df_stations.head())

df_stations.to_csv('stations_with_coords.csv', index=False)

Could not geocode city: Bhiwandi, Rajasthan
Could not geocode city: Muzzaffarnagar, Uttar Pradesh
  StationId                                     StationName  \
0     AP001                  Secretariat, Amaravati - APPCB   
1     AP002  Anand Kala Kshetram, Rajamahendravaram - APPCB   
2     AP003                      Tirumala, Tirupati - APPCB   
3     AP004                 PWD Grounds, Vijayawada - APPCB   
4     AP005          GVM Corporation, Visakhapatnam - APPCB   

                City           State  Status        lat        lon  
0          Amaravati  Andhra Pradesh  Active  16.494222  80.510586  
1  Rajamahendravaram  Andhra Pradesh     NaN  17.005045  81.780473  
2           Tirupati  Andhra Pradesh     NaN  13.631637  79.423171  
3         Vijayawada  Andhra Pradesh     NaN  16.511531  80.616047  
4      Visakhapatnam  Andhra Pradesh  Active  17.693553  83.292130  
