# Description
____

This script formats SLCs in ascending order and renames them to have no gaps. 

In [None]:
import pandas as pd

### Inputs

In [None]:
slc = pd.read_csv('../geospacial/slc/stmary_landsat_slc.csv', index_col=0)

### Clean up SLCs

In [None]:
# Remove columns with only zero values
slc = slc.loc[:, (slc != 0).any(axis=0)]

In [None]:
# Check if all St. Mary rows sum to the target value within tolerance
all_rows_sum_to_zero = ((slc.sum(axis=1) - 1).abs() < 0.00000000001).all()

if all_rows_sum_to_zero:
    print("All SLC rows sum to zero.")
else:
    print("Not SLC all rows sum to zero.")

### Remap SLC names to remove missing classes

In [None]:
# Assuming 'df' is your DataFrame containing the original column names
original_headers = slc.columns.tolist()
new_columns = []

In [None]:
# Step 1: Separate 4-digit numbers into two 2-digit numbers
separated_numbers = [(int(str(col)[:2]), int(str(col)[2:])) for col in slc.columns]

In [None]:
# Step 2: Find the amount of unique values in the arrays of first 2 letters and last 2 letters
unique_first = len(set(first for first, _ in separated_numbers))
unique_second = len(set(second for _, second in separated_numbers))

In [None]:
# Step 3: Map each unique 2-digit number to a number in the list of unique values in ascending order
mapped_first = {num: i + 1 for i, num in enumerate(sorted(set(first for first, _ in separated_numbers)))}
mapped_second = {num: i + 1 for i, num in enumerate(sorted(set(second for _, second in separated_numbers)))}

In [None]:
# Step 4: Replace each pair of 2-digit numbers with their corresponding mapped values
new_columns = [(mapped_first[first], mapped_second[second]) for first, second in separated_numbers]

In [None]:
# Step 5: Rename columns in the DataFrame 'slc' to match the new column names
new_headers = [f"{first:02d}{second:02d}" for first, second in new_columns]
slc.rename(columns=dict(zip(original_headers, new_headers)), inplace=True)

In [None]:
# Step 7: Check column names in the DataFrame 'slc' after renaming
for column_name in slc.columns:
    if len(column_name) != 4:
        raise ValueError("Some column names do not have four digits after renaming.")

In [None]:
# Save mapped_first to a text file called 'adjusted_landuse.txt'
with open('../geospacial/adjusted_landuse.txt', 'w') as file:
    for key, value in mapped_first.items():
        file.write(f"{key}: {value}\n")

In [None]:
# Save mapped_second to a text file called 'adjusted_soiltype.txt'
with open('../geospacial/adjusted_soiltype.txt', 'w') as file:
    for key, value in mapped_second.items():
        file.write(f"{key}: {value}\n")

### Re-order columns in ascending order

In [None]:
# Sort columns in ascending order of first 2 digits and then the second 2 digits
slc_sorted = slc.reindex(sorted(slc.columns, key=lambda x: (int(x[:2]), int(x[2:]))), axis=1)

In [None]:
# Save the DataFrame as a CSV file
slc_sorted.to_csv('../geospacial/slc/sorted_final_slc.csv')