In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#!pip install rasterio
import rasterio
import xarray as xr

In [19]:
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", None)

In [31]:
# Open the GeoTIFF
with rasterio.open("Maharashtra_O3_July2024-Dec2024.tif") as src:
    data = src.read(1)  # first band
    coords = {
        "y": src.xy(0, 0)[1] - src.res[1] * np.arange(src.height),  # latitude
        "x": src.xy(0, 0)[0] + src.res[0] * np.arange(src.width)   # longitude
    }
    o3_da = xr.DataArray(data, dims=("y","x"), coords=coords)

print(o3_da)

<xarray.DataArray (y: 103, x: 132)> Size: 109kB
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * y        (y) float64 824B 22.04 21.98 21.91 21.85 ... 15.75 15.69 15.63
  * x        (x) float64 1kB 72.66 72.72 72.79 72.85 ... 80.71 80.77 80.83 80.9


In [23]:
import rasterio
import xarray as xr
import numpy as np

def raster_to_xarray(file):
    with rasterio.open(file) as src:
        data = src.read(1)  # first band
        # create 1D coordinates using the affine transform
        transform = src.transform
        height, width = src.height, src.width

        # x coordinates: left + pixel width * column index
        x = transform.c + np.arange(width) * transform.a
        # y coordinates: top + pixel height * row index (note: transform.e is usually negative)
        y = transform.f + np.arange(height) * transform.e

        da = xr.DataArray(data, dims=("y","x"), coords={"y": y, "x": x})
    return da

uv_aerosol_da = raster_to_xarray("Maharashtra_UV_Aerosol_July2024-Dec2024.tif")
o3_da = raster_to_xarray("Maharashtra_O3_July2024-Dec2024.tif")

print(uv_aerosol_da)
print(o3_da)


<xarray.DataArray (y: 103, x: 132)> Size: 109kB
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * y        (y) float64 824B 22.07 22.01 21.95 21.88 ... 15.78 15.72 15.66
  * x        (x) float64 1kB 72.63 72.69 72.75 72.82 ... 80.68 80.74 80.8 80.87
<xarray.DataArray (y: 103, x: 132)> Size: 109kB
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * y        (y) float64 824B 22.07 22.01 21.95 21.88 ... 15.78 15.72 15.66
  * x        (x) float64 1kB 72.63 72.69 72.75 72.82 ... 80.68 80.74 80.8 80.87


In [30]:
import numpy as np
import xarray as xr
import pandas as pd

# o3_da = 7 km raster
# uv_aerosol_da = 500 m raster (already fine resolution)

# Compute replication factor (roughly 7km / 500m)
rep_factor_y = int(round(uv_aerosol_da.sizes["y"] / o3_da.sizes["y"]))
rep_factor_x = int(round(uv_aerosol_da.sizes["x"] / o3_da.sizes["x"]))

# Replicate O3 values to match 500 m grid
o3_rep = np.repeat(np.repeat(o3_da.values, rep_factor_y, axis=0), rep_factor_x, axis=1)

# Crop or pad if necessary to match UV shape exactly
o3_rep = o3_rep[:uv_aerosol_da.sizes["y"], :uv_aerosol_da.sizes["x"]]

# Make DataArray aligned with UV/Aerosol coordinates
o3_500m_da = xr.DataArray(o3_rep, dims=("y", "x"), coords={"y": uv_aerosol_da.y, "x": uv_aerosol_da.x})

# Combine both into a single DataFrame
xx, yy = np.meshgrid(uv_aerosol_da.x.values, uv_aerosol_da.y.values)
df = pd.DataFrame({
    "x": xx.ravel(),
    "y": yy.ravel(),
    "UV_Aerosol": uv_aerosol_da.values.ravel(),
    "O3_500m": o3_500m_da.values.ravel()
})

df.head(100)


Unnamed: 0,x,y,UV_Aerosol,O3_500m
0,72.628791,22.071607,,
1,72.691673,22.071607,,
2,72.754555,22.071607,,
3,72.817437,22.071607,,
4,72.880319,22.071607,,
5,72.943201,22.071607,,
6,73.006083,22.071607,,
7,73.068965,22.071607,,
8,73.131847,22.071607,,
9,73.194729,22.071607,,


In [32]:
df = pd.DataFrame({
    "longitude": np.repeat(o3_da.x.values, o3_da.y.size),
    "latitude": np.tile(o3_da.y.values, o3_da.x.size),
    "O3": o3_da.values.flatten()
})
#df = df.dropna().reset_index(drop=True)
df.head(100)

Unnamed: 0,longitude,latitude,O3
0,72.660232,22.040165,
1,72.660232,21.977283,
2,72.660232,21.914401,
3,72.660232,21.851519,
4,72.660232,21.788637,
5,72.660232,21.725755,
6,72.660232,21.662873,
7,72.660232,21.599991,
8,72.660232,21.537109,
9,72.660232,21.474227,


In [34]:
import matplotlib.pyplot as plt
import numpy as np

# Drop NaNs first
df_clean = df.dropna(subset=["O3"])

# Reshape O3 values back into a 2D grid
o3_grid = df_clean["O3"].values.reshape(o3_da.y.size, o3_da.x.size)

# Plot
plt.figure(figsize=(10,7))
plt.pcolormesh(o3_da.x.values, o3_da.y.values, o3_grid, shading='auto', cmap='viridis')
plt.colorbar(label='O3')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Spatial Distribution of O3')
plt.show()


ValueError: cannot reshape array of size 7069 into shape (103,132)

In [18]:
df.shape

(7069, 3)

In [39]:
#!pip install rasterio

import rasterio
import pandas as pd
import numpy as np
import os

# Path to the TIFF file
tiff_path = r"D:\satellite Downscaling Mini Project\Satellite-Downscaling-TE-Project\Keigan O3\Maharashtra_O3_July2024-Dec2024.tif"

# Output CSV path
csv_path = r"D:\Satellite Downscaling Mini Project\Satellite-Downscaling-TE-Project\Keigan O3\Maharashtra_O3_July2024-Dec2024.csv"

with rasterio.open(tiff_path) as src:
    data = src.read()  # shape = (bands, rows, cols)

    # Get pixel coordinates
    rows, cols = np.meshgrid(
        np.arange(data.shape[1]),
        np.arange(data.shape[2]),
        indexing='ij'
    )

    # Convert pixel indices to lat/lon
    xs, ys = rasterio.transform.xy(src.transform, rows.flatten(), cols.flatten())
    df = pd.DataFrame({"x": xs, "y": ys})

    # Add each band as a column
    for i in range(data.shape[0]):
        df[f"band_{i+1}"] = data[i].flatten()

# Save DataFrame to CSV
df.to_csv(csv_path, index=False)
print(f"Saved CSV: {csv_path}")
print(df.head())

RasterioIOError: 'D:\satellite Downscaling Mini Project\Satellite-Downscaling-TE-Project\Keigan O3\Maharashtra_O3_July2024-Dec2024.tif' not recognized as being in a supported file format.