Similar to MultiScaleSimFull, but also quantifying the $R^2$ of the MLP model on perturbed data. See MultiScaleSimFull for a full description of the file

In [5]:
import os
# Set your working directory
wd = "./Sim"
os.chdir(wd)
import sys
import random
import pandas as pd
from itertools import product
import copy
import logging 
import time
import random
from PIL import Image, ImageDraw, ImageFilter, ImageOps, ImageEnhance
from collections import defaultdict
from Utils import process_images
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
from transformers import CLIPModel, CLIPProcessor, AutoImageProcessor, SwinModel, set_seed
from torchvision import transforms
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import PCA
from scipy import stats

# Global Variables
MODEL_TYPE = 'CLIP' # 'CLIP', 'SWIN', 
CLIP=True
SWIN=!CLIP

assert CLIP != SWIN
REDOWNLOADDATA = False
Separate = False
Full=True
target_name = 'NOTobsY' # 'obsY', or 'tauPred'
image_sizes = [32, 256]
monte_is = [0, 1, 2, 3, 4, 5]
NUM_IMAGE_SIZES = len(image_sizes)

# Path parameters
optimize_image_reps = "clip-rsicd"
data_type = "image"
perturbation_type = "DHS"
applications = ["peru"]
perturbation_magnitude = 0
image_type = "original"
# For privacy preserving visualisation
if False:
    perturbation_type = "deterministic"
    applications = ["peru"]
    perturbation_magnitude = 10
    image_type = "cluster"
    
if sys.argv[1] == '-f':
    iter_num = -1
    print("Warning: Running in Jupyter Notebook. Using default value -1.")
elif len(sys.argv) > 1:
    args = sys.argv[1:]  # Get command line arguments
    iter_num = int(args[0])  # Assuming args is defined earlier in the code
    monte_carlo_i = int(args[1])
    if len(sys.argv) > 3:
        image_size1 = args[2]
        image_size2 = args[3]
else:
    iter_num = -1
    print("Warning: No iteration number provided. Using default value -1.")

# Directories
os.makedirs('./Figures', exist_ok=True)
os.makedirs('./TrainingInfo', exist_ok=True)
os.makedirs('./log', exist_ok=True)
os.makedirs('./FineTuneIterResults', exist_ok=True)
save_ImageTensor_dir = './DataInfo/Images'
saveObsWDir = './DataInfo/ObsW'
saveGeoDir = './DataInfo/geo'
saveObsYDir = './DataInfo/ObsY'
saveTrainIndicesDir = './DataInfo/TrainIndices'
saveResDir = './FineTuneIterResults/FineTuneRes'
saveRepDir = './FineTuneIterResults/FineTuneReps'

saveDirs = [saveObsWDir, saveGeoDir, saveObsYDir, save_ImageTensor_dir, saveTrainIndicesDir, saveResDir, saveResDir]
for dir in saveDirs:
    os.makedirs(dir, exist_ok=True)
    os.makedirs(dir, exist_ok=True)
logging.basicConfig(filename=f'./log/trainingIter_{iter_num}.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    
def set_all_seeds(seed_value=42):
    # 1. Python's built-in random library
    random.seed(seed_value)
    
    # 2. NumPy's random seed
    np.random.seed(seed_value)
    
    torch.manual_seed(seed_value)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
set_all_seeds(1)

# Conditional loading of models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if CLIP:
    # Load CLIP model and processor
    backbone_model = CLIPModel.from_pretrained("flax-community/clip-rsicd-v2").to(device)
    backbone_processor = CLIPProcessor.from_pretrained("flax-community/clip-rsicd-v2")
    print("Running CLIP")

elif SWIN:
    # Load Swin model and processor
    backbone_processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
    backbone_model = SwinModel.from_pretrained("microsoft/swin-tiny-patch4-window7-224").to(device)
    print("Running SWIN")

else:
    # Raise an error if neither model is selected
    raise ValueError("You need to specify a valid backbone model: CLIP or SWIN.")
#from transformers import ViTMAEModel, AutoImageProcessor
#backbone_processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-base")
#backbone_model = ViTMAEModel.from_pretrained("facebook/vit-mae-base")

Running CLIP


# Get Data

In [1]:


# Initialize lists and dictionaries
targets = []
application_len = {}
application_image_ids = {}
ObsW = {}
ObsY = {}
geo = {}
results_list = []
pil_images_list = []
sampled_images_list = []
perturbation_magnitude = 0
monte_is = [1, 0, 2, 3, 4, 5]
for application in applications:
    for image_size in image_sizes:
        for monte_carlo_i in monte_is:
            if image_size == 256 and monte_carlo_i != 1: 
                break
            if monte_carlo_i == 1:
                perturbation_magnitude = 0
            else:
                perturbation_magnitude = 7
                
            # Define image directory with a single image size
            image_size = image_size
            # Get image directory path
            image_dir = f'./Data/{application}/monte_carlo_{monte_carlo_i}/satellite_images/landsat/{perturbation_type}/{image_size}/30/{image_type}_{application}/magnitude_{perturbation_magnitude}/'
            my_data_path = "./Data/Joint/pooled_hh.dta"
            geo_path = f"./Data/{application}/master/gps_locations/{application}_cluster_center_coordinates_per_person_fully_subsetted.csv"
            
            # Where the results live
            results_path = f"./SavedResults/{application}/monte_carlo_1/{optimize_image_reps}/landsat/DHS/{image_size}/30/asset_hh_index_diff/rs_nDepthIS1_analysisTypeISheterogeneity_imageModelClassISVisionTransformer_optimizeImageRepIS{optimize_image_reps}_dataTypeISimage_applicationIS{application}_perturbationMagnitudeIS0_monte_carlo_iIS1_original_10.csv"

            # Load and preprocess data
            geo_df = pd.read_csv(geo_path)
            my_data = pd.read_stata(my_data_path)  
            new_columns = pd.DataFrame({
                'obsY': my_data['asset_hh_index_end'] - my_data['asset_hh_index_bsl'],
                'treatment_indicator': my_data['treatment'].map({'Control': 0, 'Treatment': 1}),
            })
            my_data = pd.concat([my_data, new_columns], axis=1)
            my_data = my_data[my_data['id'].astype(str).isin(geo_df['FPrimary'].astype(str))]
            my_data = my_data[my_data['obsY'].notna()].reset_index(drop=True)

            if len(my_data) % 2 != 0: 
                my_data = my_data.iloc[:-1]

            total_size = len(my_data)
            application_len[application] = total_size

            # Assign all indices for processing without train-test split
            all_indices = list(my_data.index)

            # Load outcome targets
            results_df = pd.read_csv(results_path)
            new_targets = results_df['tau_est'].values
            targets.extend(list(new_targets[all_indices]))

            id_with_outcome = my_data['id'].astype(str).values
            assert (len(id_with_outcome) % 2) == 0, "ID count is not even."

            # Function to extract ID from filename
            def extract_id(filename):
                return filename.split("-")[0]

            # Get sorted list of image paths based on IDs
            subsetted_image_paths = [file for file in os.listdir(image_dir) if extract_id(file) in id_with_outcome]

            # Create a mapping from ID to filename
            id_to_filename = {extract_id(file): file for file in subsetted_image_paths}

            # Ensure all IDs have corresponding images
            missing_ids = set(id_with_outcome) - set(id_to_filename.keys())

            if missing_ids:
                raise ValueError(f"Missing images in image_dir for IDs: {missing_ids}")

            # Process images in the order of id_with_outcome
            pil_images_app = process_images(image_dir, [id_to_filename[id] for id in id_with_outcome])
            
            if monte_carlo_i == 1:
                application_image_ids[(application, image_size)] = list(my_data['id'].astype(str))

                # Get auxiliary info for regression
                ObsW[(application, image_size)] = my_data['treatment_indicator']
                ObsY[(application, image_size)] = my_data['obsY']
                geo[(application, image_size)] = my_data['geo']
                pil_images_list.append(pil_images_app)
                results_list.append(results_df)
            else:
                sampled_images_list.append(pil_images_app)
                print(f"Saved sampled images of size {image_size} from {monte_carlo_i}th iteration")

NameError: name 'applications' is not defined

# Define Image Augmentation Functions

In [7]:
def add_black_blob(image, blob_size=2):
    image = image.copy()
    draw = ImageDraw.Draw(image)
    width, height = image.size
    center = (width // 2, height // 2)
    top_left = (center[0] - blob_size, center[1] - blob_size)
    bottom_right = (center[0] + blob_size, center[1] + blob_size)
    draw.rectangle([top_left, bottom_right], fill='black')
    return image

def add_edge_fade(image, fade_color=(0, 0, 0), fade_size=30):
    # Convert the image to grayscale to create a radial gradient mask
    width, height = image.size
    x = np.linspace(-1, 1, width)
    y = np.linspace(-1, 1, height)
    xx, yy = np.meshgrid(x, y)
    distance = np.sqrt(xx**2 + yy**2)

    # Create a radial fade mask (distance scaled to fade_size)
    mask = np.clip(1 - distance * fade_size, 0, 1)
    mask = (mask * 255).astype(np.uint8)

    # Convert the grayscale mask to an image
    mask_image = Image.fromarray(mask)

    # Create a solid image with the fade color
    fade_image = Image.new("RGB", (width, height), fade_color)

    # Composite the original image and the fade image using the mask
    faded_image = Image.composite(image, fade_image, mask_image)

    return faded_image

# Function to adjust the contrast of an image
def adjust_contrast(image, contrast_factor=1.2):
    enhancer = ImageEnhance.Contrast(image)
    return enhancer.enhance(contrast_factor)

# Define a simple MLP model (as in your original code)
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)
    
# Helper function to calculate confidence intervals
def mean_confidence_interval(data, confidence=0.95):
    n = len(data)
    mean = np.mean(data)
    sem = stats.sem(data)  # Standard Error of the Mean
    margin = sem * stats.t.ppf((1 + confidence) / 2., n-1)
    return mean, margin

# Implement Loop

In [8]:
import random
import numpy as np
import pandas as pd
from itertools import product
from collections import defaultdict
import torch
import torch.nn as nn
import torch.optim as optim 
from sklearn.model_selection import KFold
import copy
import matplotlib.pyplot as plt

def mean_confidence_interval(data, confidence=0.95):
    """Calculate mean and confidence interval"""
    if not data:  # Check if data list is empty
        return float('-inf'), 0
    
    data = np.array(data)
    mean = np.mean(data)
    if len(data) < 2:  # Need at least 2 samples for std calculation
        return mean, 0
        
    stderr = np.std(data, ddof=1) / np.sqrt(len(data))
    interval = stderr * 1.96  # For 95% confidence interval
    return mean, interval

# Define variables and their possible values
variables = ['ROTATION', 'BLOB', 'CONTRAST', 'EDGE']
values = [0, 1]

# Generate all possible combinations
combinations = list(product(values, repeat=len(variables)))

# Create a list to store results
results = []

# Create readable index names
index_names = []

for combo in combinations:
    # Reset data collection for each combination
    normal_r2s = [[], []]  # One list for each image size
    sampled_r2s = []
    cr_r2s = []
    
    # Create a dictionary mapping variables to their values
    params = dict(zip(variables, combo))
    ROTATION = params['ROTATION']
    BLOB = params['BLOB']
    CONTRAST = params['CONTRAST']
    EDGE = params['EDGE']
    
    # Create readable index name
    index_name = '_'.join([f"{var[:3]}{val}" for var, val in params.items()])
    
    for seed in [10, 20]:
        random.seed(seed)

        # Total number of images
        n_images = len(pil_images_list[0])

        blob_percentage = 0.5
        rotate_percentage = 0.5

        num_blob = int(n_images * blob_percentage)
        num_rotate = int(n_images * rotate_percentage)

        # Generate unique random indices
        rotation_indices = random.sample(range(n_images), num_rotate)
        blob_indices = random.sample(range(n_images), num_blob)
        edge_indices = random.sample(range(n_images), num_blob)
        contrast_indices = random.sample(range(n_images), num_blob)

        # Generate synthetic outcomes
        synthetic_outcomes = np.zeros(len(pil_images_list[0]))

        if ROTATION:
            synthetic_outcomes[rotation_indices] += np.random.normal(loc=-100.0, scale=10.0, size=len(rotation_indices))
        if BLOB:
            synthetic_outcomes[blob_indices] += np.random.normal(loc=100.0, scale=10.0, size=len(blob_indices))
        if EDGE:
            synthetic_outcomes[edge_indices] += np.random.normal(loc=100.0, scale=0.0, size=len(edge_indices))
        if CONTRAST:
            synthetic_outcomes[contrast_indices] += np.random.normal(loc=100.0, scale=10.0, size=len(contrast_indices))

        augmented_images_list = copy.deepcopy(pil_images_list)

        # Apply image augmentations
        for i in range(NUM_IMAGE_SIZES):
            if BLOB:
                for idx in blob_indices:
                    augmented_images_list[i][idx] = add_black_blob(augmented_images_list[i][idx], blob_size=1)

            if EDGE and i == 1:
                for idx in edge_indices:
                    augmented_images_list[i][idx] = add_edge_fade(augmented_images_list[i][idx].copy(), 
                                                                fade_color=(0, 0, 0), fade_size=0.5)

            if ROTATION:
                for idx in rotation_indices:
                    augmented_images_list[i][idx] = augmented_images_list[i][idx].rotate(90, expand=True)

            if CONTRAST:
                for idx in contrast_indices:
                    augmented_images_list[i][idx] = adjust_contrast(augmented_images_list[i][idx], contrast_factor=1.8)

        # Get embeddings for all images
        all_image_embeddings = defaultdict(list)
        for i, image_size in enumerate(image_sizes):
            pil_images_app = augmented_images_list[i]
            batch_size = 64

            for j in range(0, len(pil_images_app), batch_size):
                batch_images = pil_images_app[j:j+batch_size]
                image_preprocessed = backbone_processor(images=batch_images, return_tensors="pt").to(device)

                with torch.no_grad():
                    if CLIP:
                        image_embeddings = backbone_model.get_image_features(**image_preprocessed)
                    elif SWIN:
                        outputs = model(**inputs)
                        last_hidden_states = outputs.last_hidden_state.detach()
                        image_embeddings = torch.mean(last_hidden_states, dim=1)

                all_image_embeddings[image_size].append(image_embeddings)

            all_image_embeddings[image_size] = torch.cat(all_image_embeddings[image_size], dim=0)
            
        # Get embeddings for all images
        sampled_images_embeddings = defaultdict(list)
        for monte_i in monte_is:
            if monte_i == 1:
                continue
                
            pil_images_app = sampled_images_list[i]
            batch_size = 64

            for j in range(0, len(pil_images_app), batch_size):
                batch_images = pil_images_app[j:j+batch_size]
                image_preprocessed = backbone_processor(images=batch_images, return_tensors="pt").to(device)

                with torch.no_grad():
                    if CLIP:
                        image_embeddings = backbone_model.get_image_features(**image_preprocessed)
                    elif SWIN:
                        outputs = model(**inputs)
                        last_hidden_states = outputs.last_hidden_state.detach()
                        image_embeddings = torch.mean(last_hidden_states, dim=1)

                sampled_images_embeddings[monte_i].append(image_embeddings)

            sampled_images_embeddings[monte_i] = torch.cat(sampled_images_embeddings[monte_i], dim=0)


        # Initialize KFold
        kf = KFold(n_splits=5, shuffle=True, random_state=42)

        # Single resolution training
        mse_list = defaultdict(list)
        r2_list = defaultdict(list)

        for i, image_size in enumerate(image_sizes):
            X = all_image_embeddings[image_size]
            Y = synthetic_outcomes

            if torch.is_tensor(X):
                X = X.cpu().numpy()
            if torch.is_tensor(Y):
                Y = Y.cpu().numpy()

            for fold, (train_index, test_index) in enumerate(kf.split(X)):
                X_train, X_test = X[train_index], X[test_index]
                Y_train, Y_test = Y[train_index], Y[test_index]

                # Convert to tensors
                X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
                Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32).unsqueeze(1)
                X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
                Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).unsqueeze(1)

                # Train model
                input_size = X_train.shape[1]
                model = MLP(input_size)
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                loss_fn = nn.MSELoss()

                # Training loop
                for epoch in range(100):
                    model.train()
                    Y_pred_train = model(X_train_tensor)
                    loss = loss_fn(Y_pred_train, Y_train_tensor)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                # Evaluation
                model.eval()
                with torch.no_grad():
                    Y_pred_test = model(X_test_tensor).numpy()

                # Calculate metrics
                sse = np.sum((Y_test - Y_pred_test.squeeze(1))**2)
                mse = np.mean((Y_test - Y_pred_test.squeeze(1))**2)
                tss = np.sum((Y_test - Y_test.mean())**2)
                r2 = 1 - (sse / tss)

                mse_list[image_size].append(mse)
                r2_list[image_size].append(r2)

            # Store average R² for this image size
            if r2_list[image_size]:
                normal_r2s[i].append(np.mean(r2_list[image_size]))

                
                
        # Initialize KFold for Sampling Training
        kf = KFold(n_splits=5, shuffle=True, random_state=42)

        mse_sample = []
        r2_sample = []

        embeddings_list = []

        # Extract and collect embeddings for each `monte_i`
        
        for j in range(len(monte_is)):
            monte_i = j
            if monte_i == 1: 
                continue
            X = sampled_images_embeddings[monte_i]
            if torch.is_tensor(X):
                X = X.cpu().numpy()
            embeddings_list.append(X)
            embeddings_matrix = np.column_stack(embeddings_list)
            X = all_image_embeddings[256].cpu().numpy() if torch.is_tensor(all_image_embeddings[256]) else all_image_embeddings[256]
            X2 = all_image_embeddings[32].cpu().numpy() if torch.is_tensor(all_image_embeddings[32]) else all_image_embeddings[32]
            embeddings_list = [embeddings_matrix, X]
            X = np.column_stack(embeddings_list)

            Y = synthetic_outcomes

            if torch.is_tensor(X):
                X = X.cpu().numpy()
            if torch.is_tensor(Y):
                Y = Y.cpu().numpy()

            for fold, (train_index, test_index) in enumerate(kf.split(X)):
                X_train, X_test = X[train_index], X[test_index]
                Y_train, Y_test = Y[train_index], Y[test_index]

                # Convert to tensors
                X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
                Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32).unsqueeze(1)
                X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
                Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).unsqueeze(1)

                # Train model
                input_size = X_train.shape[1]
                model = MLP(input_size)
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                loss_fn = nn.MSELoss()

                # Training loop
                for epoch in range(100):
                    model.train()
                    Y_pred_train = model(X_train_tensor)
                    loss = loss_fn(Y_pred_train, Y_train_tensor)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                # Evaluation
                model.eval()
                with torch.no_grad():
                    Y_pred_test = model(X_test_tensor).numpy()

                # Calculate metrics
                sse = np.sum((Y_test - Y_pred_test.squeeze(1))**2)
                mse = np.mean((Y_test - Y_pred_test.squeeze(1))**2)
                tss = np.sum((Y_test - Y_test.mean())**2)
                r2 = 1 - (sse / tss)

                mse_sample.append(mse)
                r2_sample.append(r2)

            # Store average R² for this image size
            if r2_sample:
                sampled_r2s.append(np.mean(r2_sample))


        # Cross-resolution training
        X_list = [all_image_embeddings[image_size] for image_size in image_sizes]
        X = torch.cat(X_list, dim=1)
        
        if torch.is_tensor(X):
            X = X.cpu().numpy()

        mse_cross = []
        r2_cross = []

        for fold, (train_index, test_index) in enumerate(kf.split(X)):
            X_train, X_test = X[train_index], X[test_index]
            Y_train, Y_test = Y[train_index], Y[test_index]

            # Convert to tensors
            X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
            Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32).unsqueeze(1)
            X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
            Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).unsqueeze(1)

            # Train model
            input_size = X_train.shape[1]
            model = MLP(input_size)
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            loss_fn = nn.MSELoss()

            # Training loop
            for epoch in range(100):
                model.train()
                Y_pred_train = model(X_train_tensor)
                loss = loss_fn(Y_pred_train, Y_train_tensor)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            # Evaluation
            model.eval()
            with torch.no_grad():
                Y_pred_test = model(X_test_tensor).numpy()

            # Calculate metrics
            sse = np.sum((Y_test - Y_pred_test.squeeze(1))**2)
            mse = np.mean((Y_test - Y_pred_test.squeeze(1))**2)
            tss = np.sum((Y_test - Y_test.mean())**2)
            r2 = 1 - (sse / tss)

            mse_cross.append(mse)
            r2_cross.append(r2)

        if r2_cross:
            cr_r2s.append(np.mean(r2_cross))

    # Calculate final statistics
    sample_mean, sample_margin = mean_confidence_interval(sampled_r2s)
    cr_mean, cr_margin = mean_confidence_interval(cr_r2s)
    normal_32_mean, normal_32_margin = mean_confidence_interval(normal_r2s[0])
    normal_256_mean, normal_256_margin = mean_confidence_interval(normal_r2s[1])

    # Calculate max mean and difference ratio
    valid_means = [m for m in [normal_32_mean, normal_256_mean] if m != float('-inf')]
    max_mean = max(valid_means) if valid_means else float('-inf')
    
    if max_mean != float('-inf') and max_mean != 0:
        max_diff_ratio = (cr_mean - max_mean) / max_mean
        max_diff_sample_ratio = (sample_mean - max_mean) / max_mean
        sample_cr_ratio = (sample_mean - cr_mean)/cr_mean
    else:
        max_diff_ratio = float('-inf')
        max_diff_sample_ratio = float('-inf')
        sample_cr_ratio = float('-inf')

    # Store results
    experiment_results = {
        'cr_mean': cr_mean,
        'normal_32_mean': normal_32_mean,
        'normal_256_mean': normal_256_mean,
        'sample_mean': sample_mean,
        'cr_margin': cr_margin,
        'normal_32_margin': normal_32_margin,
        'normal_256_margin': normal_256_margin,
        'sample_margin': sample_margin,
        'max_diff_ratio': max_diff_ratio, 
        'max_diff_sample_ratio': max_diff_sample_ratio,
        'sample_cr_ratio': sample_cr_ratio
    }
    
    print(experiment_results)
    
    result_row = {**params, **experiment_results}
    results.append(result_row)
    index_names.append(index_name)

# Create DataFrame
df = pd.DataFrame(results, index=index_names)

# Reorder columns
param_cols = variables
result_cols = ['cr_mean', 'normal_32_mean', 'normal_256_mean', 'sample_mean', 
               'cr_margin', 'normal_32_margin', 'normal_256_margin', 'sample_margin',
               'max_diff_ratio', 'max_diff_sample_ratio', 'sample_cr_ratio']
df = df[param_cols + result_cols]

# Display results
print("\nDataFrame Preview:")
print(df.head())

# Save results
df.to_csv('experiment_results_sample_32_and_256_3to7.csv')

  r2 = 1 - (sse / tss)
  r2 = 1 - (sse / tss)
  r2 = 1 - (sse / tss)
  x = asanyarray(arr - arrmean)


{'cr_mean': -inf, 'normal_32_mean': -inf, 'normal_256_mean': -inf, 'sample_mean': -inf, 'cr_margin': nan, 'normal_32_margin': nan, 'normal_256_margin': nan, 'sample_margin': nan, 'max_diff_ratio': -inf, 'max_diff_sample_ratio': -inf, 'sample_cr_ratio': -inf}
{'cr_mean': 0.14898893277553354, 'normal_32_mean': -0.006074598258624719, 'normal_256_mean': 0.19478255659699928, 'sample_mean': 0.24337391961397348, 'cr_margin': 0.006600113316649881, 'normal_32_margin': 0.0017347217561652069, 'normal_256_margin': 0.0018802261224330945, 'sample_margin': 0.04598979828085454, 'max_diff_ratio': -0.23510125660898737, 'max_diff_sample_ratio': 0.24946465364200263, 'sample_cr_ratio': 0.633503342027694}
{'cr_mean': 0.3380458924873936, 'normal_32_mean': 0.19024790233107736, 'normal_256_mean': 0.19343875608540195, 'sample_mean': 0.21825046883599103, 'cr_margin': 0.031045338209211952, 'normal_32_margin': 0.012395187183906657, 'normal_256_margin': 0.03442351979322348, 'sample_margin': 0.03035581740052056, 'ma

In [9]:
df.to_csv('experiment_results_sample_32_and_256_3to7.csv')