In [None]:
import os
import rasterio
import numpy as np
import json
import matplotlib.pyplot as plt
import pandas as pd
import fiona.transform
from rasterio.plot import show
from rasterio.warp import calculate_default_transform, reproject, Resampling
from PIL import Image
from shapely import geometry
from pyproj import Transformer

In [None]:
state_boundary_file_path = r"data/input/us-state-boundaries.json"

with open(state_boundary_file_path) as file:
    data = json.load(file)
CA = data[0]['st_asgeojson']['geometry']['coordinates'][0][0]
CA = geometry.Polygon(CA)
CA

In [None]:
data_dict = {"FUEL": r"LF2019_FBFM13_200_CONUS/LC19_F13_200.tif",
            r"ELEVATION": r"LF2020_Elev_220_CONUS/LC20_Elev_220.tif",
            r"SLOPE": r"LF2020_SlpD_220_CONUS/LC20_SlpD_220.tif"}

coord_dict = {1: {"top": 37.5, "bottom": 32, "left": -119.5, "right": -114},
             2: {"top": 37.5, "bottom": 32, "left": -125, "right": -119.5},
             3: {"top": 43, "bottom": 37.5, "left": -125, "right": -119.5},
             4: {"top": 43, "bottom": 37.5, "left": -119.5, "right": -114}}

In [None]:
for variable, file in data_dict.items():
    for j in range(1, 5):
        path = f"data/input/{variable}/{j}/{file}"
        
        dataset = rasterio.open(path)

        dst_crs = '+proj=latlon'

        with rasterio.open(path) as src:
            transform, width, height = calculate_default_transform(
                src.crs, dst_crs, src.width, src.height, *src.bounds)
            kwargs = src.meta.copy()
            kwargs.update({
                'crs': dst_crs,
                'transform': transform,
                'width': width,
                'height': height})
            new_path = f"data/input/{variable}/{j}/latlon_resampled.tif"
            with rasterio.open(new_path, 'w', **kwargs) as dst:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(dst, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=transform,
                        dst_crs=dst_crs,
                        resampling=Resampling.nearest)

In [None]:
for variable in data_dict:
    df_dict = {"lon": [], "lat": [], "val": []}
    
    for k in range(1, 5):
        new_path = f"data/input/{variable}/{k}/latlon_resampled.tif"

        dataset = rasterio.open(new_path)

        top_y, left_x = dataset.index(coord_dict[k]["left"], coord_dict[k]["top"])
        bottom_y, right_x = dataset.index(coord_dict[k]["right"], coord_dict[k]["bottom"])
        
        data_array = dataset.read(1)
        
        m = 33
        
        left_lon, right_lon = coord_dict[k]["left"], coord_dict[k]["right"]
        bottom_lat, top_lat = coord_dict[k]["bottom"], coord_dict[k]["top"]
        
        # this redefinition is needed, because on the file edges we get the new values not from two adjacent files,
        # but use a new file instead which contains only the border stripes
        
        if left_lon == -119.5:
            left_lon = -119.24
        elif right_lon == -119.5:
            right_lon = -119.76
        
        if bottom_lat == 37.5:
            bottom_lat = 37.76
        elif top_lat == 37.5:
            top_lat == 37.24
        
        # resampling
        
        for i in np.arange(left_lon, right_lon + 0.001, 0.01):
            i = round(i, 3)
            for j in np.arange(bottom_lat, top_lat + 0.001, 0.01):
                j = round(j, 3)
                y, x = dataset.index(i, j)
                
                top_array_y, bottom_array_y = y - m//2, y + m//2
                left_array_x, right_array_x = x - m//2, x + m//2
                
                if variable == 'FUEL':
                    if CA.contains(geometry.Point(i, j)):
                        values, counts = np.unique(data_array[top_array_y:bottom_array_y, 
                                                              left_array_x:right_array_x], return_counts=True)
                        ind = np.argmax(counts)

                        df_dict["lon"].append(i)
                        df_dict["lat"].append(j)
                        df_dict["val"].append(values[ind])
                        
                else:
                    if CA.contains(geometry.Point(i, j)):
                        a = data_array[top_array_y:bottom_array_y, left_array_x:right_array_x].flatten()
                        value = a[~((a > 5000) | (a < -150))].mean()

                        df_dict["lon"].append(i)
                        df_dict["lat"].append(j)
                        df_dict["val"].append(value)
                    
        
        del dataset
        del data_array
    
    #new file for the horizontal stripe
    
    horizontal_path = f"data/input/{variable}/filler_horizontal/latlon_resampled.tif"

    dataset = rasterio.open(horizontal_path)
    data_array = dataset.read(1)
    
    for lon in np.arange(-119.75, -119.25 + 0.001, 0.01):
        lon = round(lon, 3)
        for lat in np.arange(32, 43 + 0.001, 0.01):
            lat = round(lat, 3)

            y, x = dataset.index(lon, lat)
                
            top_array_y, bottom_array_y = y - m//2, y + m//2
            left_array_x, right_array_x = x - m//2, x + m//2
            if CA.contains(geometry.Point(lon, lat)):
                try:
                    if variable == 'FUEL':
                        values, counts = np.unique(data_array[top_array_y:bottom_array_y, 
                                                              left_array_x:right_array_x], return_counts=True)
                        ind = np.argmax(counts)

                        df_dict["lon"].append(lon)
                        df_dict["lat"].append(lat)
                        df_dict["val"].append(values[ind])

                    else:
                        a = data_array[top_array_y:bottom_array_y, left_array_x:right_array_x].flatten()
                        value = a[~((a > 5000) | (a < -150))].mean()

                        df_dict["lon"].append(lon)
                        df_dict["lat"].append(lat)
                        df_dict["val"].append(value)
                except:
                    pass
        
    #new file for the vertical stripe
        
    vertical_path = f"data/input/{variable}/filler_vertical/latlon_resampled.tif"

    dataset = rasterio.open(vertical_path)

    data_array = dataset.read(1)
    
    for lon in np.arange(-125, -114 + 0.001, 0.01):
        lon = round(lon, 3)
        for lat in np.arange(37.25, 37.75 + 0.001, 0.01):
            lat = round(lat, 3)
            
            y, x = dataset.index(lon, lat)

            top_array_y, bottom_array_y = y - m//2, y + m//2
            left_array_x, right_array_x = x - m//2, x + m//2
            if CA.contains(geometry.Point(lon, lat)):
                try:
                    if variable == 'FUEL':
                        values, counts = np.unique(data_array[top_array_y:bottom_array_y, 
                                                                      left_array_x:right_array_x], return_counts=True)
                        ind = np.argmax(counts)

                        df_dict["lon"].append(lon)
                        df_dict["lat"].append(lat)
                        df_dict["val"].append(values[ind])

                    else:
                        a = data_array[top_array_y:bottom_array_y, left_array_x:right_array_x].flatten()
                        value = a[~((a > 5000) | (a < -150))].mean()

                        df_dict["lon"].append(lon)
                        df_dict["lat"].append(lat)
                        df_dict["val"].append(value)
                except:
                    pass

    df = pd.DataFrame(df_dict)
    
    #removing any remaining duplicates
    
    to_drop = []

    for lon in np.arange(-125, -114 + 0.001, 0.01):
        lon = round(lon, 3)
        for lat in np.arange(37.25, 37.75 + 0.001, 0.01):
            lat = round(lat, 3)
            if len(df[(df['lon'] == lon) & (df["lat"] == lat)]) == 2:
                to_drop += [df[(df['lon'] == lon) & (df["lat"] == lat)].iloc[0].name]
    
    df = df.drop(index=to_drop)
    
    df.to_csv(f"data/input/{variable}/coord_df.csv")

In [None]:
df = pd.read_csv(f"data/input/SLOPE/coord_df.csv", index_col=[0])

cmap = plt.get_cmap("terrain")

df_min, df_max = df['val'].min(), df['val'].max()
color_array = cmap((df['val'] - df_min) / (df_max - df_min))
color_array

print(df_min, df_max)

fig, ax = plt.subplots(figsize=(14, 14))
plt.axis('off')
ax.scatter(df['lon'].values, df['lat'].values, s=0.1, c=color_array)
plt.show()

In [None]:
out_df = None

for variable in data_dict:
    df = pd.read_csv(f"data/input/{variable}/coord_df.csv", index_col=[0])
    if out_df is None:
        out_df = df[["lon", "lat"]]
    out_df[variable] = df["val"]
out_df

In [None]:
missing_vals_df = out_df[out_df["FUEL"] == -9999]

fig, ax = plt.subplots(figsize=(8, 8))
plt.axis('off')
ax.scatter(missing_vals_df["lon"], missing_vals_df["lat"])
ax.plot(*CA.exterior.xy)

In [None]:
out_df = out_df.loc[out_df["FUEL"] != -9999]

In [None]:
fuel_dict = {i: f"FBFM{i}" for i in range(1, 14)}
fuel_dict.update({91: "Urban", 92: "Snow/Ice", 93: "Agriculture", 98: "Water", 99: "Barren"})

new_cols_dict = {col: [] for col in fuel_dict.values()}

for _, row in out_df.iterrows():
    for col in new_cols_dict:
        if col == fuel_dict[row["FUEL"]]:
            new_cols_dict[col].append(1)
        else:
            new_cols_dict[col].append(0)
            
for col in new_cols_dict:
    out_df[col] = new_cols_dict[col]
out_df = out_df.drop(columns=["FUEL"])
out_df

# Distance from * variables

In [None]:
import sklearn
from sklearn.neighbors import NearestNeighbors
from math import radians

### Distance from urban area

In [None]:
non_urban_coords = out_df[out_df["Urban"] == 0][["lat", "lon"]]
urban_coords = out_df[out_df["Urban"] == 1][["lat", "lon"]]

out_df["DISTANCE_FROM_URBAN_AREA"] = 0

non_urban_coords["lat"] = non_urban_coords["lat"].apply(lambda x: radians(x))
non_urban_coords["lon"] = non_urban_coords["lon"].apply(lambda x: radians(x))

urban_coords["lat"] = urban_coords["lat"].apply(lambda x: radians(x))
urban_coords["lon"] = urban_coords["lon"].apply(lambda x: radians(x))

nbrs = NearestNeighbors(n_neighbors=1, algorithm="auto", metric='haversine').fit(urban_coords.values)

distances, indices = nbrs.kneighbors(non_urban_coords.values)

distances = distances * 6371

out_df.loc[out_df["Urban"] == 0, "DISTANCE_FROM_URBAN_AREA"] = distances.flatten()

### Distance from roads

In [None]:
roads_dict = {"lon": [], "lat": [], "str": []}

for k in [1, 2, 3, 4, "filler_horizontal", "filler_vertical"]:
    path = f"data/input/ROADS/{k}/LF2020_Roads_220_CONUS/LC20_Roads_220.tif"
    
    with rasterio.open(path) as src:
        data = src.read(1)
        road_indices = np.argwhere((data == 20))
        
        rows = road_indices[::10, 0]
        cols = road_indices[::10, 1]
        
        xs, ys = rasterio.transform.xy(src.transform, rows, cols)
        
        xs, ys = fiona.transform.transform(src.crs, '+proj=latlon', xs, ys)
        
        roads_dict["lon"] += xs
        roads_dict["lat"] += ys
        roads_dict["str"] += [str(x) + str(y) for x, y in zip(xs, ys)]
        
roads_df = pd.DataFrame(roads_dict)
roads_df = roads_df.drop_duplicates("str")
roads_df = roads_df.drop(columns="str")

roads_df.to_csv("data/input/ROADS/roads_data.csv")

In [None]:
df = pd.read_csv(os.path.join(f"D:\Thesis\data\ROADS", "roads_data.csv"), index_col=[0])

fig, ax = plt.subplots(figsize=(10, 10))
plt.axis("off")
ax.scatter(df["lon"], df["lat"], s=0.1)
ax.plot(*CA.exterior.xy, c="black", linewidth=3)

In [None]:
df["lat"] = df["lat"].apply(lambda x: radians(x))
df["lon"] = df["lon"].apply(lambda x: radians(x))

nbrs = NearestNeighbors(n_neighbors=1, algorithm="auto", metric='haversine').fit(df.values)

to_map = out_df[["lon", "lat"]].copy()
to_map["lon"] = to_map["lon"].apply(lambda x: radians(x))
to_map["lat"] = to_map["lat"].apply(lambda x: radians(x))

distances, indices = nbrs.kneighbors(to_map.values)

distances = distances * 6371

out_df["DISTANCE_FROM_ROADS"] = distances

### Static variables dataset

In [None]:
out_df = out_df.reset_index(drop=True)
out_df.to_csv("data/datasets/static_variables.csv")

out_df