In [31]:
import rasterio
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import catboost as cb
from sklearn.metrics import classification_report
import numpy as np

def calculate_additional_features(red, green, blue, nir):
    # Existing NDVI calculation
    ndvi = (nir - red) / (1e-3 + nir + red)
    
    # Enhanced Vegetation Index (EVI)
    evi = 2.5 * (nir - red) / (1e-3 + nir + 6 * red - 7.5 * blue + 1)
    
    # Soil Adjusted Vegetation Index (SAVI)
    savi = ((nir - red) / (nir + red + 0.5)) * 1.5
    
    # Normalized Difference Water Index (NDWI)
    ndwi = (green - nir) / (1e-3 + green + nir)
    
    # Simple Ratio (SR)
    sr = nir / (1e-3 + red)
    
    # Green Normalized Difference Vegetation Index (GNDVI)
    gndvi = (nir - green) / (1e-3 + nir + green)
    
    # Texture features (example: local entropy)
    def entropy(values):
        values = values.flatten()
        h, _ = np.histogram(values, bins=20)
        h = h / h.sum()
        return -np.sum(h * np.log2(h + (h == 0)))
    
    entropy_red =0# generic_filter(red, entropy, size=5)
    entropy_nir =0# generic_filter(nir, entropy, size=5)
    
    return ndvi, evi, savi, ndwi, sr, gndvi, entropy_red, entropy_nir

# In your main processing function:
def load_and_preprocess_tiff(file_path, r_band, g_band, b_band, ik_band, mask_band):
    with rasterio.open(file_path) as src:
        red = src.read(r_band)
        green = src.read(g_band)
        blue = src.read(b_band)
        nir = src.read(ik_band)
        mask = src.read(mask_band)
    
    ndvi, evi, savi, ndwi, sr, gndvi, entropy_red, entropy_nir = calculate_additional_features(red, green, blue, nir)
    
    # Create a DataFrame with pixel-level information
    pixel_data = pd.DataFrame({
        'red': red.flatten(),
        'green': green.flatten(),
        'blue': blue.flatten(),
        'nir': nir.flatten(),
        'ndvi': ndvi.flatten(),
        'evi': evi.flatten(),
        'savi': savi.flatten(),
        'ndwi': ndwi.flatten(),
        'sr': sr.flatten(),
        'gndvi': gndvi.flatten(),
        #'entropy_red': entropy_red.flatten(),
        #'entropy_nir': entropy_nir.flatten(),
        #'mask': mask.flatten()
    })
    
    return pixel_data

def process_csv(csv_data):
    # Read the CSV file
    # Take only the last 10 rows
    last_10_rows = csv_data.iloc[:,1:].tail(10)
    # Calculate the mean for each feature
    mean_values = last_10_rows.mean()
    # If there's no data, the mean will be NaN
    return mean_values



In [27]:
loaded_model = cb.CatBoostClassifier()
loaded_model.load_model("model")

<catboost.core.CatBoostClassifier at 0x7f9d329ca850>

In [55]:
csv_data = pd.read_csv('tests/test.csv')
tiff_file = 'tests/test.tiff'
pixel_data = load_and_preprocess_tiff(tiff_file, r_band=1, g_band=2, b_band=3, ik_band=4, mask_band=5)
with rasterio.open(tiff_file) as src:
    original_shape = src.read(1).shape
    
csv_mean_data = process_csv(csv_data)
    
    # Step 2: Combine pixel-level and panorama-level data
csv_data_expanded = pd.DataFrame(np.tile(csv_mean_data.values, (len(pixel_data), 1)), 
                                     columns=csv_mean_data.index)
df = pd.concat([pixel_data, csv_data_expanded], axis=1)

# Get the feature names from the model
feature_names = loaded_model.feature_names_

# Create a list of columns that are not in the DataFrame
missing_columns = [col for col in feature_names if col not in df.columns]

# Add the missing columns to the DataFrame and fill them with NaN
for col in missing_columns:
    df[col] = np.nan

# Reorder the columns to match the order in feature_names
df = df.reindex(columns=feature_names)

In [60]:
cb_pred_proba = loaded_model.predict_proba(df)[:, 1].reshape(original_shape) # Get the probability estimates for the positive class

In [72]:
def pixel_to_coord(row, col, transform):
    x, y = rasterio.transform.xy(transform, row, col)
    return x, y

threshold = 0.5
with rasterio.open(tiff_file) as src:
    transform = src.transform

geojson_points = []

for row in range(cb_pred_proba.shape[0]):
    for col in range(cb_pred_proba.shape[1]):
        if cb_pred_proba[row, col] > threshold:
            x, y = pixel_to_coord(row, col, transform)
            point = {
                "type": "Feature",
                "geometry": {
                    "type": "Point",
                    "coordinates": [x, y]
                },
                "properties": {
                    "probability": float(cb_pred_proba[row, col])
                }
            }
            geojson_points.append(point)


NameError: name 'threshold' is not defined