In [None]:
# Mount Google Drive
from google.colab import drive

# Data manipulation
import pandas as pd

# Geocoding
from geopy.geocoders import Nominatim

# Pause requests to avoid rate limits
import time

# Interactive maps
import folium

# Random number generation
import random

# Plotting library
import matplotlib.pyplot as plt

# Numerical computations
import numpy as np

In [None]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/dataset_solo_sostanze.csv')
print(df.head())

             From Date file_name  CO (mg/m3)  NO (ug/m3)  NO2 (ug/m3)  \
0  2016-07-01 10:00:00     AP001        0.48       17.67         39.2   
1  2016-07-01 11:00:00     AP001        0.49       20.50         41.9   
2  2016-07-01 12:00:00     AP001        0.47       15.40         43.6   
3  2016-07-01 13:00:00     AP001        0.47       15.40         43.6   
4  2016-07-01 14:00:00     AP001        0.47       15.40         43.6   

   Ozone (ug/m3)  PM10 (ug/m3)  PM2.5 (ug/m3)  SO2 (ug/m3)  \
0           14.5          39.0          10.67         6.60   
1           15.0          39.0           2.00         6.38   
2           10.5          50.0          20.50         6.38   
3           10.5          50.0          20.50         6.38   
4           10.5          50.0          20.50         6.38   

               To Date           state      city  start_month_num  start_year  
0  2016-07-01 11:00:00  Andhra Pradesh  Tirupati                7        2016  
1  2016-07-01 12:00:00  Andh

In [None]:
# Extract unique cities
unique_cities = df['city'].unique()
print(unique_cities)

['Tirupati' 'Amaravati' 'Nagaon' 'Rajamahendravaram' 'Guwahati' 'Patna'
 'Kunjemura' 'Kishanganj' 'Chhal' 'Chandigarh' 'Delhi' 'Vatva'
 'Gandhinagar' 'Bahadurgarh' 'Kurukshetra' 'Ballabgarh' 'Jind' 'Kaithal'
 'Karnal' 'Palwal' 'Mandikhera' 'Gurugram' 'Sirsa' 'Bengaluru'
 'Chikkaballapur' 'Pithampur' 'Mandideep' 'Bhopal' 'Katni' 'Gwalior'
 'Jabalpur' 'Sagar' 'Indore' 'Nayagarh' 'Keonjhar' 'Ludhiana' 'Puducherry'
 'Rairangpur' 'Amritsar' 'Kota' 'Ajmer' 'Bhiwadi' 'Pratapgarh' 'Barmer'
 'Jaipur' 'Udaipur' 'Bikaner' 'Banswara' 'Jhunjhunu' 'Dausa' 'Jodhpur'
 'Sikar' 'Hyderabad' 'Palkalaiperur' 'Agartala' 'Ghaziabad' 'Prayagraj'
 'Varanasi' 'Agra' 'Gorakhpur' 'Lucknow' 'Kanpur' 'Moradabad' 'Vrindavan'
 'Kolkata' 'Siliguri' 'Howrah']


In [None]:
# Count records per city
city_acquisitions = df.groupby('city').size().to_dict()

# Print city names and acquisition counts
print(city_acquisitions)

{'Agartala': 20958, 'Agra': 15799, 'Ajmer': 47625, 'Amaravati': 46880, 'Amritsar': 53328, 'Bahadurgarh': 37140, 'Ballabgarh': 36806, 'Banswara': 752, 'Barmer': 751, 'Bengaluru': 41575, 'Bhiwadi': 47798, 'Bhopal': 36565, 'Bikaner': 1351, 'Chandigarh': 42192, 'Chhal': 1855, 'Chikkaballapur': 41842, 'Dausa': 752, 'Delhi': 879184, 'Gandhinagar': 36463, 'Ghaziabad': 48312, 'Gorakhpur': 15346, 'Gurugram': 26913, 'Guwahati': 38270, 'Gwalior': 34495, 'Howrah': 79726, 'Hyderabad': 121505, 'Indore': 30998, 'Jabalpur': 30998, 'Jaipur': 47999, 'Jhunjhunu': 584, 'Jind': 36999, 'Jodhpur': 1631, 'Kaithal': 36200, 'Kanpur': 28909, 'Karnal': 37062, 'Katni': 30998, 'Keonjhar': 3037, 'Kishanganj': 11863, 'Kolkata': 128619, 'Kota': 48856, 'Kunjemura': 1760, 'Kurukshetra': 37062, 'Lucknow': 31078, 'Ludhiana': 51864, 'Mandideep': 45948, 'Mandikhera': 37017, 'Moradabad': 48452, 'Nagaon': 744, 'Nayagarh': 3036, 'Palkalaiperur': 367, 'Palwal': 36480, 'Patna': 28639, 'Pithampur': 46950, 'Pratapgarh': 608, 'Pray

In [None]:
# Extract unique cities
unique_cities = df['city'].unique()

# Count acquisitions per city
city_acquisitions = df.groupby('city').size().to_dict()

# Initialize geolocator and results dictionary
geolocator = Nominatim(user_agent="my_geocoding_application")
unique_cities_with_acquisition = {}

# Geocode cities and store coordinates with acquisition count
for city in unique_cities:
    location = geolocator.geocode(city + ", India")
    if location:
        unique_cities_with_acquisition[city] = {
            'coordinates': (location.latitude, location.longitude),
            'acquisitions': city_acquisitions.get(city, 0)
        }
    time.sleep(1)  # Pause to avoid rate limits

# Print the results
print(unique_cities_with_acquisition)



{'Tirupati': {'coordinates': (13.6316368, 79.4231711), 'acquisitions': 59150}, 'Amaravati': {'coordinates': (16.5096679, 80.5184535), 'acquisitions': 46880}, 'Nagaon': {'coordinates': (26.3313646, 92.75247628339588), 'acquisitions': 744}, 'Rajamahendravaram': {'coordinates': (17.0050454, 81.7804732), 'acquisitions': 48802}, 'Guwahati': {'coordinates': (26.1805978, 91.753943), 'acquisitions': 38270}, 'Patna': {'coordinates': (25.6093239, 85.1235252), 'acquisitions': 28639}, 'Kishanganj': {'coordinates': (26.29841435, 87.94667148301295), 'acquisitions': 11863}, 'Chhal': {'coordinates': (31.1164614, 77.5286293), 'acquisitions': 1855}, 'Chandigarh': {'coordinates': (30.7334421, 76.7797143), 'acquisitions': 42192}, 'Delhi': {'coordinates': (28.6273928, 77.1716954), 'acquisitions': 879184}, 'Vatva': {'coordinates': (22.9589978, 72.6297981), 'acquisitions': 36391}, 'Gandhinagar': {'coordinates': (23.2232877, 72.6492267), 'acquisitions': 36463}, 'Bahadurgarh': {'coordinates': (28.6609648000000

In [None]:
# Initialize geolocator
geolocator = Nominatim(user_agent="my_geocoding_application")
coordinates = {}

# Geocode cities and store coordinates
for city in unique_cities:
    location = geolocator.geocode(city + ", India")
    if location:
        coordinates[city] = (location.latitude, location.longitude)
    time.sleep(1)  # Pause to avoid rate limits

# Print city coordinates
print(coordinates)



{'Tirupati': (13.6316368, 79.4231711), 'Amaravati': (16.5096679, 80.5184535), 'Nagaon': (26.3313646, 92.75247628339588), 'Rajamahendravaram': (17.0050454, 81.7804732), 'Guwahati': (26.1805978, 91.753943), 'Patna': (25.6093239, 85.1235252), 'Kishanganj': (26.29841435, 87.94667148301295), 'Chhal': (31.1164614, 77.5286293), 'Chandigarh': (30.7334421, 76.7797143), 'Delhi': (28.6273928, 77.1716954), 'Vatva': (22.9589978, 72.6297981), 'Gandhinagar': (23.2232877, 72.6492267), 'Bahadurgarh': (28.660964800000002, 76.83467647083376), 'Kurukshetra': (29.9693747, 76.8482787), 'Ballabgarh': (28.2940728, 77.35922381594979), 'Jind': (29.3140427, 76.3179488), 'Kaithal': (29.7996588, 76.3989903), 'Karnal': (29.7255766, 76.91069238886621), 'Palwal': (28.0599426, 77.32234220926105), 'Mandikhera': (27.897075, 76.9922014), 'Gurugram': (28.4646148, 77.0299194), 'Sirsa': (29.606304100000003, 74.9036567857643), 'Bengaluru': (12.98815675, 77.62260003796), 'Chikkaballapur': (13.099376424266564, 77.3887289987692

In [None]:
# Get state from coordinates
def get_state_from_coordinates(lat, lon):
    location = geolocator.reverse((lat, lon), language='en')
    if location:
        return location.raw.get('address', {}).get('state', 'Unknown')
    return 'Unknown'

# Add state info to the dictionary
unique_cities_with_acquisition_and_state = {}

for city, data in unique_cities_with_acquisition.items():
    lat, lon = data['coordinates']
    count = data['acquisitions']

    # Retrieve state
    state = get_state_from_coordinates(lat, lon)

    # Store updated data
    unique_cities_with_acquisition_and_state[city] = {
        'coordinates': (lat, lon),
        'acquisitions': count,
        'state': state
    }

    time.sleep(1)  # Pause to avoid rate limits

# Print results
print(unique_cities_with_acquisition_and_state)



{'Tirupati': {'coordinates': (13.6316368, 79.4231711), 'acquisitions': 59150, 'state': 'Andhra Pradesh'}, 'Amaravati': {'coordinates': (16.5096679, 80.5184535), 'acquisitions': 46880, 'state': 'Andhra Pradesh'}, 'Nagaon': {'coordinates': (26.3313646, 92.75247628339588), 'acquisitions': 744, 'state': 'Assam'}, 'Rajamahendravaram': {'coordinates': (17.0050454, 81.7804732), 'acquisitions': 48802, 'state': 'Andhra Pradesh'}, 'Guwahati': {'coordinates': (26.1805978, 91.753943), 'acquisitions': 38270, 'state': 'Assam'}, 'Patna': {'coordinates': (25.6093239, 85.1235252), 'acquisitions': 28639, 'state': 'Bihar'}, 'Kishanganj': {'coordinates': (26.29841435, 87.94667148301295), 'acquisitions': 11863, 'state': 'Bihar'}, 'Chhal': {'coordinates': (31.1164614, 77.5286293), 'acquisitions': 1855, 'state': 'Himachal Pradesh'}, 'Chandigarh': {'coordinates': (30.7334421, 76.7797143), 'acquisitions': 42192, 'state': 'Chandigarh'}, 'Delhi': {'coordinates': (28.6273928, 77.1716954), 'acquisitions': 879184, 

In [None]:
# Create India map
india_map = folium.Map(location=[20.5937, 78.9629], zoom_start=5)

# Function to scale radius based on acquisitions
def get_proportional_radius(acquisitions, min_acq, max_acq, min_r=5, max_r=15):
    return min_r + ((acquisitions - min_acq) / (max_acq - min_acq)) * (max_r - min_r)

# Extract unique states
states = {data['state'] for data in unique_cities_with_acquisition_and_state.values()}

# Generate distinct colors for states
color_palette = plt.cm.get_cmap('Accent', len(states))
state_colors = {state: color_palette(i / len(states)) for i, state in enumerate(states)}

# Get min/max acquisitions for scaling
min_acq = min(city['acquisitions'] for city in unique_cities_with_acquisition_and_state.values())
max_acq = max(city['acquisitions'] for city in unique_cities_with_acquisition_and_state.values())

# Add city markers to the map
for city, data in unique_cities_with_acquisition_and_state.items():
    lat, lon = data['coordinates']
    acquisitions = data['acquisitions']
    state = data['state']

    # Scale radius and assign color
    radius = get_proportional_radius(acquisitions, min_acq, max_acq)
    color = state_colors[state]

    # Add city marker
    folium.CircleMarker(
        location=[lat, lon],
        radius=radius,
        color='black',
        color_opacity=0.6,
        fill=True,
        fill_color=f'#{int(color[0]*255):02x}{int(color[1]*255):02x}{int(color[2]*255):02x}',
        fill_opacity=0.7
    ).add_to(india_map)

# Display the map
india_map

  color_palette = plt.cm.get_cmap('Accent', len(states))


In [None]:
# Save the map as an HTML file in Google Drive
india_map.save('/content/drive/My Drive/india_map.html')