In [34]:
import numpy as np
import os
import scipy.io as sio
from sklearn.preprocessing import OneHotEncoder

# Specify the directory
directory = r"C:\Users\rrsuj\Downloads\capstone\data"

# Initialize lists to store data
roof_types = []
degrees = []
height_width_ratios = []
Cp_real = []

# List all files in the directory
files = os.listdir(directory)

# Iterate over the files to load data
for file in files:
    if file.endswith('.mat'):
        print(f"Loading file: {file}")

        # Extract roof type, height-width ratio, and degree
        try:
            # Example filename format: Cp_ts_RCH18_deg000.mat
            # Extract roof type (e.g., 'RCH', 'ROH')
            roof_type = None
            if 'RCH' in file:
                roof_type = 'RCH'
            elif 'ROH' in file:
                roof_type = 'ROH'
            # Add more conditions if necessary for other roof types

            if roof_type:
                # Extract height-width ratio and degree
                parts = file.split('_')

                if len(parts) >= 3:
                    height_width_ratio_str = parts[1][3:5]  # '06', '12', '18'
                    degree_str = parts[2][3:6]  # '000', '023', '045', etc.

                    # Convert height-width ratio and degree to integers
                    if height_width_ratio_str.isdigit() and degree_str.isdigit():
                        height_width_ratio = int(height_width_ratio_str)
                        degree = int(degree_str)

                        # Load the data from the .mat file
                        data = sio.loadmat(os.path.join(directory, file))

                        # Check if the expected data ('Cp') is in the loaded .mat file
                        if 'Cp' in data:
                            Cp = data['Cp']
                            if Cp.size > 0:
                                Cp_real.append(Cp)
                                roof_types.append(roof_type)
                                degrees.append(degree)
                                height_width_ratios.append(height_width_ratio)

                                # Debugging: Print the extracted roof type and other details
                                print(f"Extracted - Roof type: {roof_type}, Height-Width Ratio: {height_width_ratio}, Degree: {degree}")
                            else:
                                print(f"Skipping file {file} due to empty 'Cp' data.")
                        else:
                            print(f"Skipping file {file} due to missing 'Cp' data.")
                    else:
                        print(f"Skipping file {file} due to invalid numeric values in the filename.")
                else:
                    print(f"Skipping file {file} due to unexpected filename format.")
            else:
                print(f"Skipping file {file} due to unrecognized roof type.")

        except Exception as e:
            print(f"Skipping file {file} due to error: {e}")

# Convert lists to numpy arrays for processing
roof_types = np.array(roof_types)
degrees = np.array(degrees)
height_width_ratios = np.array(height_width_ratios)
Cp_real = np.array(Cp_real)

# Check if roof_types has been populated
print(f"Roof types: {roof_types}")

# One-hot encode roof types
if roof_types.size > 0:  # Ensure that roof_types is not empty
    encoder = OneHotEncoder(sparse_output=False)
    roof_types_encoded = encoder.fit_transform(roof_types.reshape(-1, 1))
else:
    print("No roof types found. Please check the input data.")

# Combine features (angles, roof types, height-width ratios)
if roof_types.size > 0:
    X = np.hstack([degrees.reshape(-1, 1), roof_types_encoded, height_width_ratios.reshape(-1, 1)])
    print(f"Loaded data: {X.shape}, Cp_real: {Cp_real.shape}")
else:
    print("No data to process due to empty roof_types.")


Loading file: Cp_ts_RAH06_deg000.mat
Skipping file Cp_ts_RAH06_deg000.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH06_deg023.mat
Skipping file Cp_ts_RAH06_deg023.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH06_deg045.mat
Skipping file Cp_ts_RAH06_deg045.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH06_deg068.mat
Skipping file Cp_ts_RAH06_deg068.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH06_deg090.mat
Skipping file Cp_ts_RAH06_deg090.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH12_deg000.mat
Skipping file Cp_ts_RAH12_deg000.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH12_deg023.mat
Skipping file Cp_ts_RAH12_deg023.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH12_deg045.mat
Skipping file Cp_ts_RAH12_deg045.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH12_deg068.mat
Skipping file Cp_ts_RAH12_deg068.mat due to unrecognized roof type.
Loading file: Cp_ts_RAH12_deg090.mat
Skipping file Cp_t