In [2]:
pip install rpy2

Collecting rpy2
  Downloading rpy2-3.6.4-py3-none-any.whl.metadata (5.4 kB)
Collecting rpy2-rinterface>=3.6.3 (from rpy2)
  Downloading rpy2_rinterface-3.6.3-cp310-cp310-win_amd64.whl.metadata (1.9 kB)
Collecting rpy2-robjects>=3.6.3 (from rpy2)
  Downloading rpy2_robjects-3.6.3-py3-none-any.whl.metadata (3.3 kB)
Collecting tzlocal (from rpy2-robjects>=3.6.3->rpy2)
  Downloading tzlocal-5.3.1-py3-none-any.whl.metadata (7.6 kB)
Downloading rpy2-3.6.4-py3-none-any.whl (9.9 kB)
Downloading rpy2_rinterface-3.6.3-cp310-cp310-win_amd64.whl (174 kB)
Downloading rpy2_robjects-3.6.3-py3-none-any.whl (125 kB)
Downloading tzlocal-5.3.1-py3-none-any.whl (18 kB)
Installing collected packages: tzlocal, rpy2-rinterface, rpy2-robjects, rpy2

   ---------- ----------------------------- 1/4 [rpy2-rinterface]
   ---------- ----------------------------- 1/4 [rpy2-rinterface]
   ---------- ----------------------------- 1/4 [rpy2-rinterface]
   ---------- ----------------------------- 1/4 [rpy2-rinterface]


In [2]:
import pyreadr
import numpy as np
import pickle
import os

# -----------------------------
# Funciones
# -----------------------------

def load_rdata(path):
    """Carga un RData y devuelve el primer objeto dentro"""
    result = pyreadr.read_r(path)
    key = list(result.keys())[0]
    return result[key]

def dataframe_to_dict(df):
    """
    Convierte un DataFrame tipo Faulty a diccionario {fallo: [sim1, sim2, ...]}
    """
    fault_dict = {}
    for col in df.columns:
        sims_col = df[col].values.tolist()
        fault_dict[col] = sims_col
    return fault_dict

def reduce_list_generic(sims, n=20):
    """
    Reduce a n simulaciones.
    Funciona con listas de Python o DataFrames de pandas (filas = simulaciones)
    """
    # Convertir DataFrame a lista de filas
    if hasattr(sims, "iloc"):
        sims = sims.values.tolist()
    # Reducir
    if len(sims) <= n:
        return sims
    idx = np.random.choice(len(sims), size=n, replace=False)
    return [sims[i] for i in idx]

def reduce_faulty_dict(faulty_dict, n=20):
    """Reduce cada fallo dentro del diccionario a n simulaciones"""
    reduced = {}
    for fault_name, sims in faulty_dict.items():
        # Cada sims puede ser lista o DataFrame
        reduced[fault_name] = reduce_list_generic(sims, n)
    return reduced

# -----------------------------
# Directorio de archivos
# -----------------------------
DATA_DIR = r"C:\Users\miren\Documents\GitHub\proyecto_analitica"  # <-- Cambiar si es necesario

files = {
    "FaultFree_Training":  os.path.join(DATA_DIR, "TEP_FaultFree_Training.RData"),
    "FaultFree_Testing":   os.path.join(DATA_DIR, "TEP_FaultFree_Testing.RData"),
    "Faulty_Training":     os.path.join(DATA_DIR, "TEP_Faulty_Training.RData"),
    "Faulty_Testing":      os.path.join(DATA_DIR, "TEP_Faulty_Testing.RData"),
}

N = 20  # simulaciones por fallo

# -----------------------------
# Procesamiento archivo por archivo
# -----------------------------
reduced = {}

for name, path in files.items():
    print(f"\n=== Procesando {name} ===")
    
    data = load_rdata(path)
    
    # Detectar tipo y convertir si es necesario
    if "Faulty" in name and hasattr(data, "columns"):  # DataFrame Faulty
        data = dataframe_to_dict(data)
    
    # Reducir simulaciones
    if "FaultFree" in name:  # FaultFree puede ser DataFrame o lista
        reduced_data = reduce_list_generic(data, N)
    else:  # Faulty diccionario
        reduced_data = reduce_faulty_dict(data, N)
    
    reduced[name] = reduced_data
    
    # Guardar pickle por archivo
    out_file = os.path.join(DATA_DIR, f"{name}_reduced.pkl")
    with open(out_file, "wb") as f:
        pickle.dump(reduced_data, f, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"{name} reducido y guardado en {out_file}")

# Guardar todo junto en un único pickle
OUTPUT_PICKLE = os.path.join(DATA_DIR, "TEP_reduced.pkl")
with open(OUTPUT_PICKLE, "wb") as f:
    pickle.dump(reduced, f, protocol=pickle.HIGHEST_PROTOCOL)

print(f"\n Todos los datasets reducidos guardados en {OUTPUT_PICKLE}")


=== Procesando FaultFree_Training ===
FaultFree_Training reducido y guardado en C:\Users\miren\Documents\GitHub\proyecto_analitica\FaultFree_Training_reduced.pkl

=== Procesando FaultFree_Testing ===
FaultFree_Testing reducido y guardado en C:\Users\miren\Documents\GitHub\proyecto_analitica\FaultFree_Testing_reduced.pkl

=== Procesando Faulty_Training ===
Faulty_Training reducido y guardado en C:\Users\miren\Documents\GitHub\proyecto_analitica\Faulty_Training_reduced.pkl

=== Procesando Faulty_Testing ===
Faulty_Testing reducido y guardado en C:\Users\miren\Documents\GitHub\proyecto_analitica\Faulty_Testing_reduced.pkl

 Todos los datasets reducidos guardados en C:\Users\miren\Documents\GitHub\proyecto_analitica\TEP_reduced.pkl


In [6]:
import pickle
from rpy2.robjects import r, ListVector, FloatVector
import rpy2.robjects as ro

# -----------------------------
# Función de conversión
# -----------------------------
def pyobj_to_r(obj):
    """Convierte un objeto Python (lista o dict) a R compatible con rpy2"""
    if isinstance(obj, dict):
        r_dict = {}
        for k, v in obj.items():
            # v puede ser lista de listas
            if isinstance(v, list):
                if all(isinstance(x, list) for x in v):
                    r_dict[k] = ListVector({str(i+1): FloatVector(x) for i, x in enumerate(v)})
                else:
                    r_dict[k] = FloatVector(v)
            else:
                r_dict[k] = v
        return ListVector(r_dict)
    elif isinstance(obj, list):
        # lista de listas → convertir a ListVector
        if all(isinstance(x, list) for x in obj):
            return ListVector({str(i+1): FloatVector(x) for i, x in enumerate(obj)})
        else:
            return FloatVector(obj)
    else:
        raise TypeError(f"Tipo no soportado: {type(obj)}")

# -----------------------------
# Cargar pickle con datasets reducidos
# -----------------------------
PICKLE_FILE = r"C:\Users\miren\Documents\GitHub\proyecto_analitica\TEP_reduced.pkl"

with open(PICKLE_FILE, "rb") as f:
    reduced = pickle.load(f)

# -----------------------------
# Guardar cada dataset como RData
# -----------------------------
for name in ["FaultFree_Training", "FaultFree_Testing", "Faulty_Training", "Faulty_Testing"]:
    obj = pyobj_to_r(reduced[name])
    ro.globalenv[name] = obj
    r(f'save({name}, file="{name}_reduced.RData")')
    print(f"{name}_reduced.RData guardado.")

print("\n Todos los datasets reducidos guardados en formato RData.")

FaultFree_Training_reduced.RData guardado.
FaultFree_Testing_reduced.RData guardado.
Faulty_Training_reduced.RData guardado.
Faulty_Testing_reduced.RData guardado.

 Todos los datasets reducidos guardados en formato RData.
