In [None]:
import pandas as pd

In [None]:
stmary = pd.read_csv('../geospacial/slc/stmary_landsat_slc.csv', index_col=0)

In [None]:
milk= pd.read_csv('../geospacial/slc/milk_landsat_slc.csv', index_col=0)

### Clean up SLCs

In [None]:
len(stmary.columns)

In [None]:
len(milk.columns)

In [None]:
# Remove columns with only zero values
stmary = stmary.loc[:, (stmary != 0).any(axis=0)]
milk= milk.loc[:, (milk != 0).any(axis=0)]

In [None]:
len(stmary.columns)

In [None]:
len(milk.columns)

In [None]:
# Check if all St. Mary rows sum to the target value within tolerance
all_rows_sum_to_zero = ((stmary.sum(axis=1) - 1).abs() < 0.00000000001).all()

if all_rows_sum_to_zero:
    print("All St. Mary rows sum to zero.")
else:
    print("Not St. Mary all rows sum to zero.")

In [None]:
# Check if all Milk rows sum to the target value within tolerance
all_rows_sum_to_zero = ((stmary.sum(axis=1) - 1).abs() < 0.00000000001).all()

if all_rows_sum_to_zero:
    print("All Milk rows sum to zero.")
else:
    print("Not Milk all rows sum to zero.")

### Format St. Mary

In [None]:
stmary

In [None]:
# Iterate over column names and update them
new_columns = []
for column_name in stmary.columns:
    if len(column_name) == 3:
        column_name = '0' + column_name  # Add leading zero if the column name has 3 digits
    first_part = str(int(column_name[:2]) + 19) # add 19 for 19 landsat classes
    second_part = str(int(column_name[2:]) + 14) # add 13 for 13 soil type classes + 2 to avoid 0 and double 13
    new_column_name = first_part + second_part
    new_columns.append(new_column_name)

In [None]:
# Rename the columns
stmary.columns = new_columns

In [None]:
stmary

### Reformat Milk

In [None]:
milk

In [None]:
# Iterate over column names and update them
new_column_names = []
for column_name in milk.columns:
    # Add leading zero to ensure each part has two digits
    column_name = column_name.zfill(4)
    
    # Add 1 to the second part
    second_part_updated = str(int(column_name[2:]) + 1).zfill(2) # add 1 to avoid 0 for soil type
    
    # Combine the first part and the updated second part
    new_column_name = column_name[:2] + second_part_updated
    
    new_column_names.append(new_column_name)

In [None]:
# Rename the columns
milk.columns = new_column_names

In [None]:
milk

### Merge the SLCs together

In [None]:
# Concatenate the DataFrames vertically
slc = pd.concat([milk, stmary], axis=0)

In [None]:
# Assuming result_df is your concatenated DataFrame
slc.fillna(0, inplace=True)

In [None]:
slc

In [None]:
# Check if allrows sum to the target value within tolerance
all_rows_sum_to_zero = ((slc.sum(axis=1) - 1).abs() < 0.00000000001).all()

if all_rows_sum_to_zero:
    print("All rows sum to zero.")
else:
    print("Not all rows sum to zero.")

### remap SLC names to remove missing classes

In [None]:
# Assuming 'df' is your DataFrame containing the original column names
original_headers = slc.columns.tolist()
new_columns = []

In [None]:
# Step 1: Separate 4-digit numbers into two 2-digit numbers
separated_numbers = [(int(str(col)[:2]), int(str(col)[2:])) for col in slc.columns]

In [None]:
# Step 2: Find the amount of unique values in the arrays of first 2 letters and last 2 letters
unique_first = len(set(first for first, _ in separated_numbers))
unique_second = len(set(second for _, second in separated_numbers))

In [None]:
# Step 3: Map each unique 2-digit number to a number in the list of unique values in ascending order
mapped_first = {num: i + 1 for i, num in enumerate(sorted(set(first for first, _ in separated_numbers)))}
mapped_second = {num: i + 1 for i, num in enumerate(sorted(set(second for _, second in separated_numbers)))}

In [None]:
# Step 4: Replace each pair of 2-digit numbers with their corresponding mapped values
new_columns = [(mapped_first[first], mapped_second[second]) for first, second in separated_numbers]

In [None]:
# Step 5: Rename columns in the DataFrame 'slc' to match the new column names
new_headers = [f"{first:02d}{second:02d}" for first, second in new_columns]
slc.rename(columns=dict(zip(original_headers, new_headers)), inplace=True)

In [None]:
# Step 7: Check column names in the DataFrame 'slc' after renaming
for column_name in slc.columns:
    if len(column_name) != 4:
        raise ValueError("Some column names do not have four digits after renaming.")

In [None]:
# Save mapped_first to a text file called 'adjusted_landuse.txt'
with open('../geospacial/adjusted_landuse.txt', 'w') as file:
    for key, value in mapped_first.items():
        file.write(f"{key}: {value}\n")

In [None]:
# Save mapped_second to a text file called 'adjusted_soiltype.txt'
with open('../geospacial/adjusted_soiltype.txt', 'w') as file:
    for key, value in mapped_second.items():
        file.write(f"{key}: {value}\n")

In [None]:
slc

### Re-order columns in ascending order

In [None]:
# Sort columns in ascending order of first 2 digits and then the second 2 digits
slc_sorted = slc.reindex(sorted(slc.columns, key=lambda x: (int(x[:2]), int(x[2:]))), axis=1)

In [None]:
slc_sorted

In [None]:
# Save the DataFrame as a CSV file
slc_sorted.to_csv('../geospacial/slc/sorted_final_slc.csv')