In [None]:
# @title Memory optimisation
import pandas as pd
import numpy as np

def optimize_memory(df: pd.DataFrame) -> pd.DataFrame:
    """
    Optimize memory usage of a pandas DataFrame by converting columns to more efficient types.

    Parameters:
    df (pd.DataFrame): Input DataFrame.

    Returns:
    pd.DataFrame: Optimized DataFrame with reduced memory usage.
    """
    # Iterate over all columns to optimize memory usage
    for col in df.columns:
        col_type = df[col].dtype

        # Check for numeric columns
        if col_type != 'object':  # Skip object columns (textual/categorical data)
            # If it's a float, convert to a lower precision float
            if np.issubdtype(col_type, np.floating):
                # Convert float64 to float32 or float16 based on the range of values
                if df[col].min() > np.finfo(np.float32).min and df[col].max() < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                elif df[col].min() > np.finfo(np.float16).min and df[col].max() < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)

            # If it's an integer, convert to a lower precision integer
            elif np.issubdtype(col_type, np.integer):
                # Convert int64 to int32 or int16 based on the range of values
                if df[col].min() > np.iinfo(np.int32).min and df[col].max() < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif df[col].min() > np.iinfo(np.int16).min and df[col].max() < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)

        # Convert object columns (strings) to category type if possible
        else:
            if df[col].nunique() / len(df) < 0.5:  # Convert to category if the number of unique values is small
                df[col] = df[col].astype('category')

    return df

# Assuming 'df' is the DataFrame you want to optimize
# df = ... (Load your DataFrame here)

# Display memory usage before optimization
print("Memory before optimization:")
print(df.info(memory_usage='deep'))

# Optimize memory
df_optimized = optimize_memory(df)

# Display memory usage after optimization
print("\nMemory after optimization:")
print(df_optimized.info(memory_usage='deep'))

Memory before optimization:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 37 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Recipientgender       187 non-null    int64  
 1   Stemcellsource        187 non-null    int64  
 2   Donorage              187 non-null    float64
 3   Donorage35            187 non-null    int64  
 4   IIIV                  187 non-null    int64  
 5   Gendermatch           187 non-null    int64  
 6   DonorABO              187 non-null    int64  
 7   RecipientABO          186 non-null    float64
 8   RecipientRh           185 non-null    float64
 9   ABOmatch              186 non-null    float64
 10  CMVstatus             171 non-null    float64
 11  DonorCMV              185 non-null    float64
 12  RecipientCMV          173 non-null    float64
 13  Disease               187 non-null    int64  
 14  Riskgroup             187 non-null    int64  
