In [None]:
import pandas as pd
import re
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors

In [None]:
data = pd.read_csv('/content/AWQI_India_2017.csv')
print(data.head())

  LATITUDE LONGITUDE     PH    TH     CA      MG  CHLORIDE  SULPHATE  NITRATE  \
0  13.9728   77.7314  100.0  90.0  100.0  100.00    100.00     100.0    100.0   
1  14.6717   77.4583  100.0   0.0   83.2   14.17     94.53     100.0    100.0   
2  14.4117   77.7200  100.0  22.5   54.4   80.11     96.40     100.0      0.0   
3  15.1167   77.6431  100.0   0.0   41.6   59.24     82.27     100.0    100.0   
4  15.1681   77.3764  100.0   0.0   48.0   52.31     90.80      72.0      0.0   

   FLUORIDE   AWQI  
0     100.0  93.32  
1      14.0  21.70  
2     100.0  37.20  
3     100.0  27.96  
4     100.0  19.39  


In [None]:
def dms_to_decimal(dms_str, default_direction=None):
    # Match degrees, minutes, seconds, and optional hemisphere
    dms_regex = re.match(r"(\d+)°(\d+)'([\d.]+)\"? ?([NSEW]?)", dms_str.strip())

    if dms_regex:
        degrees = int(dms_regex.group(1))
        minutes = int(dms_regex.group(2))
        seconds = float(dms_regex.group(3))
        # Use captured direction if present, otherwise apply the default
        direction = dms_regex.group(4) if dms_regex.group(4) else default_direction

        # Calculate decimal degrees
        decimal = degrees + minutes / 60 + seconds / 3600

        # For South and West coordinates, negate the decimal value
        if direction in ['S', 'W']:
            decimal *= -1

        return decimal
    else:
        raise ValueError(f"Invalid DMS format: {dms_str}")


In [None]:
def convert_coordinate(coord_str, default_direction):
    # Check if it's already in decimal format (by checking for a decimal point)
    try:
        # If it can be converted to a float, it's already in decimal
        return float(coord_str)
    except ValueError:
        try:
        # If it's not a float, assume it's in DMS format and convert
         return dms_to_decimal(coord_str, default_direction)
        except ValueError:
          return None

In [None]:
data['Latitude_Decimal'] = data['LATITUDE'].apply(lambda x: convert_coordinate(x, 'N'))
data['Longitude_Decimal'] = data['LONGITUDE'].apply(lambda x: convert_coordinate(x, 'E'))

In [None]:
data = data.dropna(subset=['Latitude_Decimal', 'Longitude_Decimal'])

In [None]:
print(data['Latitude_Decimal'])
print(data['Longitude_Decimal'])

0       13.9728
1       14.6717
2       14.4117
3       15.1167
4       15.1681
         ...   
6937    25.7714
6938    25.7878
6939    25.6811
6940    25.7078
6941    25.6214
Name: Latitude_Decimal, Length: 6942, dtype: float64
0       77.7314
1       77.4583
2       77.7200
3       77.6431
4       77.3764
         ...   
6937    88.1875
6938    88.1894
6939    88.1133
6940    88.1172
6941    88.1083
Name: Longitude_Decimal, Length: 6942, dtype: float64


In [None]:
data = data.reset_index(drop=True)

In [None]:
# Load country and state boundary shapefiles and ensure they are in the same CRS
country_boundary_path = '/content/IndiaBoundary.shp'
state_boundary_path = '/content/India-States.shp'

country_boundary = gpd.read_file(country_boundary_path).to_crs(epsg=4326)
state_boundary = gpd.read_file(state_boundary_path).to_crs(epsg=4326)

# Create a figure and axis with matplotlib
fig, ax = plt.subplots(figsize=(10, 12))

# Plot the country boundary
country_boundary.plot(ax=ax, edgecolor="black", linewidth=2, facecolor="none")

# Plot state boundaries
state_boundary.plot(ax=ax, edgecolor="grey", linewidth=1, facecolor="none")

# Normalize the water quality index values for proportional circle sizing
max_index = data['AWQI'].max()
min_index = data['AWQI'].min()

# Plot the points using scatter and color them by index
scatter = ax.scatter(
    data['Longitude_Decimal'], data['Latitude_Decimal'],
    c=data['AWQI'],  # Color based on index
    s = 10,
    cmap='RdYlGn', alpha=0.6, edgecolor="k"
)

# Add color bar for the water quality index
cbar = plt.colorbar(scatter, ax=ax, orientation="vertical")
cbar.set_label("Water Quality Index")

# Set labels and title
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_title("Water Quality Index Across India with State Boundaries")

plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(10, 12))
country_boundary.plot(ax=ax, edgecolor="black", linewidth=2, facecolor="none")
state_boundary.plot(ax=ax, edgecolor="grey", linewidth=1, facecolor="none")
plt.show()
