<a href="https://colab.research.google.com/github/Ahnaf-045/TugasPemrogramanKomputer/blob/main/Tugas_Minggu_12_Kelompok_10_C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rasterio geopandas gdown pandas

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m81.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3


In [None]:
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.sample import sample_gen
import numpy as np
from datetime import datetime
import os

def download_and_process_data():
    """
    Process geospatial data from GeoTIFF, GPKG, and CSV files.

    Note: Download the files from Google Drive first:
    1. GeoTIFF: https://drive.google.com/file/d/1-Zdq_ZXj4WoX5ubQnoHRjSlHfHrx07x0/view?usp=sharing
    2. GPKG: https://drive.google.com/file/d/1AQcHlqzmpQIukLyPIkUnb04-pbjoET_y/view?usp=sharing
    3. CSV: https://drive.google.com/file/d/1v8_2PwMxl1QGdcbC_RPgUGtYSZsmJDri/view?usp=sharing
    """

    # File paths (update these to match your downloaded files)
    geotiff_path = "your_geotiff_file.tif"  # Replace with actual filename
    gpkg_path = "your_survey_data.gpkg"     # Replace with actual filename
    csv_path = "perioda.csv"                # Replace with actual filename

    print("Starting geospatial data processing...")

    # Step 1: Load survey data from GPKG
    print("Loading survey data from GPKG...")
    try:
        survey_data = gpd.read_file(gpkg_path)
        print(f"Survey data loaded: {len(survey_data)} records")
        print("Columns:", survey_data.columns.tolist())
    except Exception as e:
        print(f"Error loading GPKG: {e}")
        return None

    # Step 2: Load period lookup table
    print("Loading period lookup table...")
    try:
        period_lookup = pd.read_csv(csv_path)
        print(f"Period lookup loaded: {len(period_lookup)} records")
        print("Columns:", period_lookup.columns.tolist())
    except Exception as e:
        print(f"Error loading CSV: {e}")
        return None

    # Step 3: Filter and prepare survey data
    print("Filtering survey data...")

    # Extract coordinates (assuming geometry column exists)
    if 'geometry' in survey_data.columns:
        survey_data['lon'] = survey_data.geometry.x
        survey_data['lat'] = survey_data.geometry.y

    # Create filtered table with required columns
    # Adjust column names based on your actual data structure
    required_cols = ['fid', 'lon', 'lat', 'date', 'fase']

    # Check which columns exist and map them
    available_cols = survey_data.columns.tolist()
    print(f"Available columns: {available_cols}")

    # Create mapping for common column name variations
    column_mapping = {}
    for col in required_cols:
        if col in available_cols:
            column_mapping[col] = col
        elif col.upper() in available_cols:
            column_mapping[col] = col.upper()
        elif col.lower() in available_cols:
            column_mapping[col] = col.lower()
        elif col == 'fid' and 'FID' in available_cols:
            column_mapping[col] = 'FID'
        elif col == 'fid' and 'id' in available_cols:
            column_mapping[col] = 'id'
        elif col == 'date' and 'Date' in available_cols:
            column_mapping[col] = 'Date'
        elif col == 'fase' and 'phase' in available_cols:
            column_mapping[col] = 'phase'

    # Filter data with existing columns
    filtered_data = survey_data.copy()

    # Rename columns to standard names
    for new_name, old_name in column_mapping.items():
        if old_name in filtered_data.columns:
            filtered_data = filtered_data.rename(columns={old_name: new_name})

    # Select only required columns that exist
    existing_cols = [col for col in required_cols if col in filtered_data.columns]
    filtered_survey = filtered_data[existing_cols + ['geometry'] if 'geometry' in filtered_data.columns else existing_cols].copy()

    print(f"Filtered survey data: {len(filtered_survey)} records")
    print("Table 1 - Filtered Survey Data:")
    print(filtered_survey.head())

    # Step 4: Add period information
    print("Adding period information...")

    # Merge with period lookup table
    # Adjust merge column based on your data structure
    if 'date' in filtered_survey.columns and len(period_lookup.columns) >= 2:
        # Assuming period lookup has date and period columns
        period_col_names = period_lookup.columns.tolist()

        # Convert date columns to datetime for proper matching
        if 'date' in filtered_survey.columns:
            try:
                filtered_survey['date'] = pd.to_datetime(filtered_survey['date'])
            except:
                pass

        # Merge based on the first column of period lookup (assuming it's the key)
        merge_key = period_col_names[0]
        period_key = period_col_names[1] if len(period_col_names) > 1 else 'periode'

        try:
            # Try different merge strategies
            if merge_key in filtered_survey.columns:
                survey_with_period = filtered_survey.merge(
                    period_lookup,
                    left_on=merge_key,
                    right_on=period_col_names[0],
                    how='left'
                )
            else:
                # Create a simple period assignment based on date ranges or other logic
                survey_with_period = filtered_survey.copy()
                survey_with_period['periode'] = 1  # Default period

        except Exception as e:
            print(f"Merge error: {e}")
            survey_with_period = filtered_survey.copy()
            survey_with_period['periode'] = 1  # Default period
    else:
        survey_with_period = filtered_survey.copy()
        survey_with_period['periode'] = 1  # Default period

    print("Table 2 - Survey Data with Period:")
    print(survey_with_period.head())

    # Step 5: Extract raster values
    print("Extracting raster values...")

    try:
        with rasterio.open(geotiff_path) as src:
            print(f"Raster info: {src.count} bands, {src.width}x{src.height} pixels")

            # Prepare coordinates for sampling
            coords = [(lon, lat) for lon, lat in zip(survey_with_period['lon'], survey_with_period['lat'])]

            # Extract values for each point and each period
            final_results = []

            for idx, row in survey_with_period.iterrows():
                try:
                    coord = (row['lon'], row['lat'])
                    periode = int(row.get('periode', 1))

                    # Ensure period is within valid band range
                    if periode > src.count:
                        periode = 1

                    # Sample the raster at the point location for the specific band
                    sampled_values = list(sample_gen(src, [coord], indexes=[periode]))
                    p0_value = sampled_values[0][0] if sampled_values and len(sampled_values[0]) > 0 else np.nan

                    # Create result row
                    result_row = {
                        'fid': row.get('fid', idx),
                        'lon': row['lon'],
                        'lat': row['lat'],
                        'date': row.get('date', ''),
                        'fase': row.get('fase', ''),
                        'periode': periode,
                        'p0': p0_value
                    }

                    final_results.append(result_row)

                except Exception as e:
                    print(f"Error processing point {idx}: {e}")
                    continue

            # Create final DataFrame
            final_df = pd.DataFrame(final_results)

            print("Table 3 - Final Results with Raster Values:")
            print(final_df.head())
            print(f"\nFinal dataset: {len(final_df)} records")

            # Save results
            output_file = "processed_survey_data.csv"
            final_df.to_csv(output_file, index=False)
            print(f"Results saved to: {output_file}")

            return final_df

    except Exception as e:
        print(f"Error processing raster: {e}")
        return survey_with_period

def main():
    """
    Main function to run the data processing pipeline.
    """
    print("=== Geospatial Data Processing Pipeline ===")
    print()
    print("Before running this script, please:")
    print("1. Download the GeoTIFF file from Google Drive")
    print("2. Download the GPKG survey data file from Google Drive")
    print("3. Download the perioda.csv file from Google Drive")
    print("4. Update the file paths in the script")
    print()

    # Check if files exist
    files_to_check = ["your_geotiff_file.tif", "your_survey_data.gpkg", "perioda.csv"]
    missing_files = [f for f in files_to_check if not os.path.exists(f)]

    if missing_files:
        print("Missing files:")
        for f in missing_files:
            print(f"  - {f}")
        print("\nPlease download and update file paths before running.")
        return

    # Process the data
    result = download_and_process_data()

    if result is not None:
        print("\n=== Processing Complete ===")
        print("Three tables have been generated:")
        print("1. Filtered survey data (fid, lon, lat, date, fase)")
        print("2. Survey data with period (fid, lon, lat, date, fase, periode)")
        print("3. Final data with raster values (fid, lon, lat, date, fase, periode, p0)")
    else:
        print("Processing failed. Please check file paths and data format.")

if __name__ == "__main__":
    main()


# Alternative function for manual file processing
def process_with_custom_paths(geotiff_path, gpkg_path, csv_path):
    """
    Process data with custom file paths.

    Args:
        geotiff_path (str): Path to GeoTIFF file
        gpkg_path (str): Path to GPKG file
        csv_path (str): Path to CSV file
    """
    # Update the file paths in the global scope
    globals()['geotiff_path'] = geotiff_path
    globals()['gpkg_path'] = gpkg_path
    globals()['csv_path'] = csv_path

    return download_and_process_data()

=== Geospatial Data Processing Pipeline ===

Before running this script, please:
1. Download the GeoTIFF file from Google Drive
2. Download the GPKG survey data file from Google Drive
3. Download the perioda.csv file from Google Drive
4. Update the file paths in the script

Missing files:
  - your_geotiff_file.tif
  - your_survey_data.gpkg
  - perioda.csv

Please download and update file paths before running.


In [None]:
try:
    import gdown
    import pandas as pd
    import geopandas as gpd
    import rasterio
    from rasterio.sample import sample_gen
    import numpy as np
    from datetime import datetime
    import os
except ImportError as e:
    print(f"Error: {e}")
    print("Please install missing packages using:")
    print("!pip install rasterio geopandas gdown pandas numpy")
    raise

# Rest of your code...
def download_and_process_data():
    """
    Download and process geospatial data from Google Drive links.
    """
    # Google Drive URLs and output files
    geotiff_url = "https://drive.google.com/uc?id=1-Zdq_ZXj4WoX5ubQnoHRjSlHfHrx07x0"
    gpkg_url = "https://drive.google.com/uc?id=1AQcHlqzmpQIukLyPIkUnb04-pbjoET_y"
    csv_url = "https://drive.google.com/uc?id=1v8_2PwMxl1QGdcbC_RPgUGtYSZsmJDri"

    geotiff_path = "downloaded_geotiff.tif"
    gpkg_path = "downloaded_survey.gpkg"
    csv_path = "perioda.csv"

    print("Starting download and processing...")

    # Download files using gdown
    try:
        print("Downloading GeoTIFF...")
        gdown.download(geotiff_url, geotiff_path, quiet=False)

        print("Downloading GPKG...")
        gdown.download(gpkg_url, gpkg_path, quiet=False)

        print("Downloading CSV...")
        gdown.download(csv_url, csv_path, quiet=False)

        print("All files downloaded successfully!")
    except Exception as e:
        print(f"Download error: {e}")
        return None

    # Step 1: Load and prepare survey data
    print("\nLoading survey data from GPKG...")
    try:
        survey_data = gpd.read_file(gpkg_path).to_crs(epsg=4326)
        survey_data["lon"] = survey_data.geometry.x
        survey_data["lat"] = survey_data.geometry.y
        survey_data["fid"] = survey_data.index.astype(int)

        # Handle column name variations
        if "tanggal" in survey_data.columns:
            survey_data = survey_data.rename(columns={"tanggal": "date"})
        if "Phase" in survey_data.columns:
            survey_data = survey_data.rename(columns={"Phase": "fase"})

        # Select required columns
        required_cols = ['fid', 'lon', 'lat', 'date', 'fase']
        missing_cols = [c for c in required_cols if c not in survey_data.columns]

        if missing_cols:
            print(f"Warning: Missing columns: {missing_cols}. Adding with default values.")
            for col in missing_cols:
                if col == 'date':
                    survey_data[col] = datetime.now().strftime('%Y-%m-%d')
                elif col == 'fase':
                    survey_data[col] = 'unknown'

        output1 = survey_data[required_cols].copy()
        print("\n✅ TABEL 1 - Filtered survey data")
        print(output1.head())
    except Exception as e:
        print(f"Error loading GPKG: {e}")
        return None

    # Step 2: Load and merge period data
    print("\nLoading period lookup table...")
    try:
        period_lookup = pd.read_csv(csv_path, sep=';')
        period_lookup = period_lookup.rename(columns={"Periode": "fase"})
        period_lookup['fase'] = period_lookup['fase'].astype(str).str.strip()
        output1['fase'] = output1['fase'].astype(str).str.strip()

        output2 = pd.merge(output1, period_lookup, on="fase", how="left")

        # Convert fase to integer for band selection
        try:
            output2["periode"] = output2["fase"].astype(int)
        except ValueError:
            print("Warning: Could not convert 'fase' to integer. Using default period 1.")
            output2["periode"] = 1

        print("\n✅ TABEL 2 - Survey data with period")
        print(output2.head())
    except Exception as e:
        print(f"Error loading/merging CSV: {e}")
        return None

    # Step 3: Extract raster values
    print("\nExtracting raster values...")
    try:
        with rasterio.open(geotiff_path) as src:
            print(f"Raster info: {src.count} bands, {src.width}x{src.height} pixels")

            def get_pixel_value(lon, lat, band):
                try:
                    if not 1 <= band <= src.count:
                        return None
                    for val in sample_gen(src, [(lon, lat)], indexes=[band]):
                        return val[0]
                except Exception as e:
                    print(f"Error at ({lon}, {lat}) band {band}: {e}")
                    return None

            output2["p0"] = output2.apply(
                lambda row: get_pixel_value(row["lon"], row["lat"], row["periode"]),
                axis=1
            )

            output3 = output2[['fid', 'lon', 'lat', 'date', 'fase', 'periode', 'p0']]
            print("\n✅ TABEL 3 - Final results with raster values")
            print(output3.head())

            # Save results
            output_file = "processed_survey_data.csv"
            output3.to_csv(output_file, index=False)
            print(f"\nResults saved to: {output_file}")

            # Clean up
            for file in [geotiff_path, gpkg_path, csv_path]:
                try:
                    if os.path.exists(file):
                        os.remove(file)
                except:
                    pass

            return output3

    except Exception as e:
        print(f"Error processing raster: {e}")
        return None

def main():
    print("=== Geospatial Data Processing Pipeline ===")
    result = download_and_process_data()

    if result is not None:
        print("\n=== Processing Complete ===")
        print("Tiga tabel telah berhasil dibuat:")
        print("1. TABEL 1: Data survey terfilter")
        print("2. TABEL 2: Data survey dengan periode")
        print("3. TABEL 3: Data final dengan nilai raster")
    else:
        print("Processing failed. Please check the error messages.")

if __name__ == "__main__":
    main()

=== Geospatial Data Processing Pipeline ===
Starting download and processing...
Downloading GeoTIFF...


Downloading...
From (original): https://drive.google.com/uc?id=1-Zdq_ZXj4WoX5ubQnoHRjSlHfHrx07x0
From (redirected): https://drive.google.com/uc?id=1-Zdq_ZXj4WoX5ubQnoHRjSlHfHrx07x0&confirm=t&uuid=c03743cb-dcc8-4625-b948-75ce51b19698
To: /content/downloaded_geotiff.tif
100%|██████████| 1.92G/1.92G [00:11<00:00, 166MB/s]


Downloading GPKG...


Downloading...
From: https://drive.google.com/uc?id=1AQcHlqzmpQIukLyPIkUnb04-pbjoET_y
To: /content/downloaded_survey.gpkg
100%|██████████| 1.62M/1.62M [00:00<00:00, 117MB/s]


Downloading CSV...


Downloading...
From: https://drive.google.com/uc?id=1v8_2PwMxl1QGdcbC_RPgUGtYSZsmJDri
To: /content/perioda.csv
100%|██████████| 827/827 [00:00<00:00, 1.42MB/s]


All files downloaded successfully!

Loading survey data from GPKG...

✅ TABEL 1 - Filtered survey data
   fid         lon       lat       date  fase
0    0  107.463959 -6.260439  3/18/2024     1
1    1  107.464081 -6.260284  3/18/2024     1
2    2  107.463608 -6.260762  3/18/2024     1
3    3  107.461742 -6.261511  3/18/2024     1
4    4  107.463209 -6.260423  3/18/2024     1

Loading period lookup table...

✅ TABEL 2 - Survey data with period
   fid         lon       lat       date fase  Start Date    End Date  periode
0    0  107.463959 -6.260439  3/18/2024    1  2024-01-01  2024-01-13        1
1    1  107.464081 -6.260284  3/18/2024    1  2024-01-01  2024-01-13        1
2    2  107.463608 -6.260762  3/18/2024    1  2024-01-01  2024-01-13        1
3    3  107.461742 -6.261511  3/18/2024    1  2024-01-01  2024-01-13        1
4    4  107.463209 -6.260423  3/18/2024    1  2024-01-01  2024-01-13        1

Extracting raster values...
Raster info: 31 bands, 8079x3834 pixels

✅ TABEL 3 - Fi

In [3]:
!pip install rasterio geopandas gdown pandas



In [4]:
try:
    import gdown
    import pandas as pd
    import geopandas as gpd
    import rasterio
    from rasterio.sample import sample_gen
    import numpy as np
    from datetime import datetime
    import os
except ImportError as e:
    print(f"Error: {e}")
    print("Please install missing packages using:")
    print("!pip install rasterio geopandas gdown pandas numpy")
    raise

def download_and_process_data():
    """
    Download and process geospatial data from Google Drive links.
    Creates a table with p0 (current period) and p1-p6 (previous periods).
    """
    # Google Drive URLs and output files
    geotiff_url = "https://drive.google.com/uc?id=1-Zdq_ZXj4WoX5ubQnoHRjSlHfHrx07x0"
    gpkg_url = "https://drive.google.com/uc?id=1AQcHlqzmpQIukLyPIkUnb04-pbjoET_y"
    csv_url = "https://drive.google.com/uc?id=1v8_2PwMxl1QGdcbC_RPgUGtYSZsmJDri"

    geotiff_path = "downloaded_geotiff.tif"
    gpkg_path = "downloaded_survey.gpkg"
    csv_path = "perioda.csv"

    print("Starting download and processing...")

    # Download files using gdown
    try:
        print("Downloading GeoTIFF...")
        gdown.download(geotiff_url, geotiff_path, quiet=False)

        print("Downloading GPKG...")
        gdown.download(gpkg_url, gpkg_path, quiet=False)

        print("Downloading CSV...")
        gdown.download(csv_url, csv_path, quiet=False)

        print("All files downloaded successfully!")
    except Exception as e:
        print(f"Download error: {e}")
        return None

    # Step 1: Load and prepare survey data
    print("\nLoading survey data from GPKG...")
    try:
        survey_data = gpd.read_file(gpkg_path).to_crs(epsg=4326)
        survey_data["lon"] = survey_data.geometry.x
        survey_data["lat"] = survey_data.geometry.y
        survey_data["fid"] = survey_data.index.astype(int)

        # Handle column name variations
        if "tanggal" in survey_data.columns:
            survey_data = survey_data.rename(columns={"tanggal": "date"})
        if "Phase" in survey_data.columns:
            survey_data = survey_data.rename(columns={"Phase": "fase"})

        # Select required columns
        required_cols = ['fid', 'lon', 'lat', 'date', 'fase']
        missing_cols = [c for c in required_cols if c not in survey_data.columns]

        if missing_cols:
            print(f"Warning: Missing columns: {missing_cols}. Adding with default values.")
            for col in missing_cols:
                if col == 'date':
                    survey_data[col] = datetime.now().strftime('%Y-%m-%d')
                elif col == 'fase':
                    survey_data[col] = 'unknown'

        output1 = survey_data[required_cols].copy()
        print("\n✅ TABEL 1 - Filtered survey data")
        print(output1.head())
    except Exception as e:
        print(f"Error loading GPKG: {e}")
        return None

    # Step 2: Load and merge period data
    print("\nLoading period lookup table...")
    try:
        period_lookup = pd.read_csv(csv_path, sep=';')
        period_lookup = period_lookup.rename(columns={"Periode": "fase"})
        period_lookup['fase'] = period_lookup['fase'].astype(str).str.strip()
        output1['fase'] = output1['fase'].astype(str).str.strip()

        output2 = pd.merge(output1, period_lookup, on="fase", how="left")

        # Convert fase to integer for band selection
        try:
            output2["periode"] = output2["fase"].astype(int)
        except ValueError:
            print("Warning: Could not convert 'fase' to integer. Using default period 1.")
            output2["periode"] = 1

        print("\n✅ TABEL 2 - Survey data with period")
        print(output2.head())
    except Exception as e:
        print(f"Error loading/merging CSV: {e}")
        return None

    # Step 3: Extract raster values for current and previous periods
    print("\nExtracting raster values for all periods...")
    try:
        with rasterio.open(geotiff_path) as src:
            print(f"Raster info: {src.count} bands, {src.width}x{src.height} pixels")

            def get_pixel_value(lon, lat, band):
                """Extract pixel value from specific band at given coordinates"""
                try:
                    if not 1 <= band <= src.count:
                        return None
                    for val in sample_gen(src, [(lon, lat)], indexes=[band]):
                        return val[0]
                except Exception as e:
                    print(f"Error at ({lon}, {lat}) band {band}: {e}")
                    return None

            # Extract values for current period (p0)
            print("Extracting current period values (p0)...")
            output2["p0"] = output2.apply(
                lambda row: get_pixel_value(row["lon"], row["lat"], row["periode"]),
                axis=1
            )

            # Extract values for previous periods (p1 to p6)
            print("Extracting previous period values (p1-p6)...")
            for i in range(1, 7):  # p1 to p6
                col_name = f"p{i}"
                print(f"  Processing {col_name}...")

                output2[col_name] = output2.apply(
                    lambda row: get_pixel_value(
                        row["lon"],
                        row["lat"],
                        max(1, row["periode"] - i)  # Ensure band >= 1
                    ) if row["periode"] - i >= 1 else None,
                    axis=1
                )

            # Create final output with all required columns
            final_cols = ['fid', 'lon', 'lat', 'date', 'fase', 'periode', 'p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6']
            output3 = output2[final_cols].copy()

            print("\n✅ TABEL 3 - Final results with all period values")
            print(output3.head())
            print(f"\nSummary of extracted values:")
            for col in ['p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6']:
                non_null_count = output3[col].notna().sum()
                total_count = len(output3)
                print(f"  {col}: {non_null_count}/{total_count} valid values")

            # Save results
            output_file = "processed_survey_data_with_periods.csv"
            output3.to_csv(output_file, index=False)
            print(f"\nResults saved to: {output_file}")

            # Show example of period relationships
            print("\n📊 Example of period relationships:")
            if len(output3) > 0:
                sample_row = output3.iloc[0]
                current_period = sample_row['periode']
                print(f"Current periode: {current_period}")
                print(f"p0 (periode {current_period}): {sample_row['p0']}")
                for i in range(1, 7):
                    prev_period = max(1, current_period - i)
                    print(f"p{i} (periode {prev_period}): {sample_row[f'p{i}']}")

            # Clean up downloaded files
            for file in [geotiff_path, gpkg_path, csv_path]:
                try:
                    if os.path.exists(file):
                        os.remove(file)
                        print(f"Cleaned up: {file}")
                except:
                    pass

            return output3

    except Exception as e:
        print(f"Error processing raster: {e}")
        return None

def validate_results(df):
    """Validate the processing results"""
    if df is None or len(df) == 0:
        print("❌ No data to validate")
        return False

    print("\n🔍 Data Validation:")
    print(f"Total rows: {len(df)}")
    print(f"Total columns: {len(df.columns)}")

    # Check for required columns
    required_cols = ['lon', 'lat', 'date', 'fase', 'periode', 'p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6']
    missing_cols = [col for col in required_cols if col not in df.columns]

    if missing_cols:
        print(f"❌ Missing columns: {missing_cols}")
        return False
    else:
        print("✅ All required columns present")

    # Check data completeness
    print("\nData completeness:")
    for col in ['p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6']:
        completeness = (df[col].notna().sum() / len(df)) * 100
        print(f"  {col}: {completeness:.1f}%")

    return True

def main():
    print("=== Enhanced Geospatial Data Processing Pipeline ===")
    print("This pipeline extracts backscatter values for current and previous periods")
    print("p0 = current period, p1-p6 = 1-6 periods before\n")

    result = download_and_process_data()

    if result is not None:
        print("\n=== Processing Complete ===")
        print("Tiga tabel telah berhasil dibuat:")
        print("1. TABEL 1: Data survey terfilter")
        print("2. TABEL 2: Data survey dengan periode")
        print("3. TABEL 3: Data final dengan nilai raster untuk semua periode")

        # Validate results
        validate_results(result)

        print(f"\n📈 Final dataset shape: {result.shape}")
        print("Columns:", list(result.columns))

    else:
        print("❌ Processing failed. Please check the error messages.")

if __name__ == "__main__":
    main()

=== Enhanced Geospatial Data Processing Pipeline ===
This pipeline extracts backscatter values for current and previous periods
p0 = current period, p1-p6 = 1-6 periods before

Starting download and processing...
Downloading GeoTIFF...


Downloading...
From (original): https://drive.google.com/uc?id=1-Zdq_ZXj4WoX5ubQnoHRjSlHfHrx07x0
From (redirected): https://drive.google.com/uc?id=1-Zdq_ZXj4WoX5ubQnoHRjSlHfHrx07x0&confirm=t&uuid=4a6bbf6b-59a3-4072-9d86-2540b4f5b1f3
To: /content/downloaded_geotiff.tif
100%|██████████| 1.92G/1.92G [00:18<00:00, 105MB/s]


Downloading GPKG...


Downloading...
From: https://drive.google.com/uc?id=1AQcHlqzmpQIukLyPIkUnb04-pbjoET_y
To: /content/downloaded_survey.gpkg
100%|██████████| 1.62M/1.62M [00:00<00:00, 152MB/s]


Downloading CSV...


Downloading...
From: https://drive.google.com/uc?id=1v8_2PwMxl1QGdcbC_RPgUGtYSZsmJDri
To: /content/perioda.csv
100%|██████████| 827/827 [00:00<00:00, 1.77MB/s]


All files downloaded successfully!

Loading survey data from GPKG...

✅ TABEL 1 - Filtered survey data
   fid         lon       lat       date  fase
0    0  107.463959 -6.260439  3/18/2024     1
1    1  107.464081 -6.260284  3/18/2024     1
2    2  107.463608 -6.260762  3/18/2024     1
3    3  107.461742 -6.261511  3/18/2024     1
4    4  107.463209 -6.260423  3/18/2024     1

Loading period lookup table...

✅ TABEL 2 - Survey data with period
   fid         lon       lat       date fase  Start Date    End Date  periode
0    0  107.463959 -6.260439  3/18/2024    1  2024-01-01  2024-01-13        1
1    1  107.464081 -6.260284  3/18/2024    1  2024-01-01  2024-01-13        1
2    2  107.463608 -6.260762  3/18/2024    1  2024-01-01  2024-01-13        1
3    3  107.461742 -6.261511  3/18/2024    1  2024-01-01  2024-01-13        1
4    4  107.463209 -6.260423  3/18/2024    1  2024-01-01  2024-01-13        1

Extracting raster values for all periods...
Raster info: 31 bands, 8079x3834 pixels