In [1]:
import numpy as np
import pandas as pd
import os
import rasterio as rio
from utils import *


In [2]:
# tiff_path=r"C:\Users\rishi\ml_projects\UHI\Landsat_0\Kanpur\LST\April.tif"
# with rasterio.open(tiff_path) as src:
#     band1=src.read(1)
#     print(band1.shape)

In [None]:
def get_sample_points(city_name, shape, N=10000):
    rng = np.random.default_rng(seed=hash(city_name) % (2**32)) 
    rows = rng.integers(0, shape[0], size=N)
    cols = rng.integers(0, shape[1], size=N)
    return list(zip(rows, cols))


In [None]:
def get_features_pixels(tiff_path,tiff_path2,city):
    if os.path.exists(tiff_path) and os.path.exists(tiff_path2):
        with rasterio.open(tiff_path) as src1, rasterio.open(tiff_path2) as src2:

            band10 = src1.read(1).astype(float)  # LST band
            cloud = src1.read(3).astype(int)     # Cloud mask band
            unc = src1.read(2).astype(float) * 0.01  # Uncertainty band
            blue_band=src2.read(1)
            green_band=src2.read(2)
            red_band=src2.read(3)
            nir_band=src2.read(4)
            swir1_band=src2.read(5)
            swir2_band=src2.read(6)
            # Apply cloud mask
            mask = cloud_mask(cloud)
            red = red_band * 0.0000275 -0.2
            nir = nir_band * 0.0000275 -0.2
            blue= blue_band * 0.0000275 -0.2
            green= green_band * 0.0000275 -0.2
            swir1= swir1_band * 0.0000275 -0.2
            swir2=swir2_band * 0.0000275 -0.2

            ndvi = (nir - red) / (nir + red)
            nume= 0.356* blue + 0.130*red +0.373*nir +.085*swir1 +0.072*swir2 -0.0018
            albedo=nume/1.016
            ndbi= (swir1-nir)/(swir1+nir)
           
            LST_K = band10 * 0.00341802 + 149
            LST_C = LST_K - 273.15

            # Apply masking
            lst2 = np.copy(LST_C)
            lst2[(mask == 1)] = np.nan  
            nan_count=np.isnan(lst2).sum()
            # if nan_count>= 0.4* lst2.shape[0]*lst2.shape[1]:
            #     raise ValueError("Too many missing points")
            
            shape=np.shape(lst2)
            points=get_sample_points(city,shape)
            data=[]
            for point in points:
                row={}
                if not np.isnan(lst2[point[0]][point[1]]):
                    row["LST"]=lst2[point[0]][point[1]]
                    row["NDVI"]=ndvi[point[0]][point[1]]
                    row["NDBI"]=ndbi[point[0]][point[1]]
                    row["Albedo"]=albedo[point[0]][point[1]]
                else:
                    row["LST"]=np.nan
                    row["NDVI"]=np.nan
                    row["NDBI"]=np.nan
                    row["Albedo"]=np.nan
                data.append(row)
            df=pd.DataFrame(data)
            return df
        
    else:
        raise FileNotFoundError("One or both TIFF paths do not exist.")


In [5]:
cities=["Delhi","Hyderabad", "Mumbai", "Bangalore","Kolkata","Chennai","Pune",\
        "Kanpur","Surat","Ahmedabad"]
months = {
        1: ("January", 31),
        2: ("February", 28),
        3: ("March", 31),
        4: ("April", 30),
        5: ("May", 31),
        6: ("June", 30),
        7: ("July", 31),
        8: ("August", 31),
        9: ("September", 30),
        10: ("October", 31),
        11: ("November", 30),
        12: ("December", 31)
    }
data=[]
month_count=0
for year in range(2000,2025):
    for month in months:
        month_count+=1
        for city in cities:
            row = {"Year": year, "Month": month, "Aggregate_Month":month_count, "City": city}
            lst_t=fr"/home/f20222001/test-venv/UHI/Landsat_{year-2000}/{city}/LST/{months[month][0]}.tif"
            ndvi_t=fr"/home/f20222001/test-venv/UHI/Landsat_{year-2000}/{city}/NDVI/{months[month][0]}.tif"
            try:
               row["Pointwise_Data"]=get_features_pixels(lst_t,ndvi_t,city)
            except Exception as e:
                row["Pointwise_Data"]=np.nan
            data.append(row)
    print(year)
df=pd.DataFrame(data)

2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024


In [6]:
df.dropna(subset=["Pointwise_Data"], inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Year,Month,Aggregate_Month,City,Pointwise_Data
0,2000,1,1,Hyderabad,LST NDVI NDBI Albedo...
1,2000,1,1,Chennai,LST NDVI NDBI Albedo...
2,2000,1,1,Surat,LST NDVI NDBI Albedo...
3,2000,1,1,Ahmedabad,LST NDVI NDBI Albedo...
4,2000,2,2,Delhi,LST NDVI NDBI Albed...
...,...,...,...,...,...
1798,2024,12,300,Chennai,LST NDVI NDBI Albedo...
1799,2024,12,300,Pune,LST NDVI NDBI Albedo...
1800,2024,12,300,Kanpur,LST NDVI NDBI Albedo...
1801,2024,12,300,Surat,LST NDVI NDBI Albedo...


In [None]:
df.to_pickle("pixelated_db_10_fk.pkl")

In [9]:
df["Pointwise_Data"][0]

Unnamed: 0,LST,NDVI,NDBI,Albedo
0,33.212223,0.314720,0.187720,0.138333
1,32.784970,0.203027,0.116432,0.188628
2,37.847058,0.222606,0.161355,0.181382
3,31.311804,0.294090,0.015158,0.168077
4,32.313284,0.270867,0.169398,0.200220
...,...,...,...,...
39995,32.952453,0.291216,0.129683,0.172204
39996,35.939803,0.111702,0.092735,0.145024
39997,36.585809,0.175635,0.132946,0.205878
39998,36.445670,0.290567,0.086115,0.141747


In [10]:
newdf=pd.read_pickle(r"/home/f20222001/test-venv/UHI/New Notebooks/pixelated_db_10k.pkl")
newdf['Pointwise_Data'][0]

FileNotFoundError: [Errno 2] No such file or directory: '/home/f20222001/test-venv/UHI/New Notebooks/pixelated_db_10k.pkl'