In [10]:
import pandas as pd
import numpy as np 
import os
from pathlib import Path

In [11]:
os.chdir('/Users/hadrienpadilla/Documents/McGill/Peyrache Lab/pRNN')

# Root folder
datafolder = Path("Data/hadrien_analyzed_nets")

# folder_name  -> desired category label
FOLDERS = {
    "base_nets":          "base",
    "GD_no_reward":       "gd no reward",
    "GD_reward_mults":    "gd mult",
    "GD_reward_repeats":  "gd repeat",
    "rand_rew_mult":      "rand mult",
    "rand_rew_repeats":   "rand rew",
}

dfs = []

for folder_name, category_label in FOLDERS.items():
    folder = datafolder / folder_name

    # Primary expected pickle path: <folder>/<folder>.pkl (matches your screenshot)
    primary_pkl = folder / f"{folder_name}.pkl"

    # Fallback: if naming differs, load all .pkl files in the folder and concat them
    pkls_to_load = []
    if primary_pkl.exists():
        pkls_to_load = [primary_pkl]
    else:
        pkls_to_load = sorted(folder.glob("*.pkl"))

    if not pkls_to_load:
        print(f"[skip] No pickle files found in {folder}")
        continue

    part_dfs = []
    for p in pkls_to_load:
        try:
            df = pd.read_pickle(p)
            # Ensure it's a DataFrame
            if not isinstance(df, pd.DataFrame):
                df = pd.DataFrame(df)
            df["category"] = category_label
            df["source_pickle"] = str(p)  # optional: trace provenance
            part_dfs.append(df)
        except Exception as e:
            print(f"[warn] Failed to read {p}: {e}")

    if part_dfs:
        dfs.append(pd.concat(part_dfs, ignore_index=True))

# Build the mega dataframe
megaDF = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()

# Make category a tidy ordered categorical (optional)
cat_order = ["base", "gd mult", "gd repeat", "gd no reward", "rand rew", "rand mult"]
megaDF["category"] = pd.Categorical(megaDF["category"], categories=cat_order, ordered=True)

# Optional: put category up front if these columns exist
front_cols = [c for c in ["category", "netname", "bins", "cells", "circles", "complex_peaks", "source_pickle"] if c in megaDF.columns]
other_cols = [c for c in megaDF.columns if c not in front_cols]
megaDF = megaDF[front_cols + other_cols]

# Quick sanity check
print(megaDF["category"].value_counts(dropna=False))
print(megaDF.head())


gd repeat       49
rand mult       49
gd mult         44
rand rew        42
base             7
gd no reward     1
Name: category, dtype: int64
  category                                          netname  \
0     base       multRNN_5win_i2_o2-no_reward-s1042_ep5-cpu   
1     base  multRNN_5win_i2_o2-no_reward_1001-s1001_ep5-cpu   
2     base  multRNN_5win_i2_o2-no_reward_2002-s2002_ep5-cpu   
3     base  multRNN_5win_i2_o2-no_reward_3003-s3003_ep5-cpu   
4     base  multRNN_5win_i2_o2-no_reward_4004-s4004_ep5-cpu   

                                                bins  \
0  {'single': {'counts': [12, 19, 18, 26, 34, 36]...   
1  {'single': {'counts': [17, 15, 10, 29, 19, 29]...   
2  {'single': {'counts': [15, 19, 13, 33, 33, 30]...   
3  {'single': {'counts': [15, 17, 12, 34, 41, 32]...   
4  {'single': {'counts': [6, 16, 11, 21, 31, 21],...   

                                               cells  \
0  [{'idx': 1, 'center': [0.65625, 0.593754020718...   
1  [{'idx': 0, 'center': [0.5

In [17]:

out_pkl = os.path.join("Data/hadrien_analyzed_nets", "megaDF.pkl")
megaDF.to_pickle(out_pkl)
print("Saved:", out_pkl)

Saved: Data/hadrien_analyzed_nets/megaDF.pkl


In [18]:
os.chdir('/Users/hadrienpadilla/Documents/McGill/Peyrache Lab/pRNN')
df = pd.read_pickle('Data/hadrien_analyzed_nets/megaDF.pkl')
print(df)

     category                                            netname  \
0        base         multRNN_5win_i2_o2-no_reward-s1042_ep5-cpu   
1        base    multRNN_5win_i2_o2-no_reward_1001-s1001_ep5-cpu   
2        base    multRNN_5win_i2_o2-no_reward_2002-s2002_ep5-cpu   
3        base    multRNN_5win_i2_o2-no_reward_3003-s3003_ep5-cpu   
4        base    multRNN_5win_i2_o2-no_reward_4004-s4004_ep5-cpu   
..        ...                                                ...   
187  rand rew  multRNN_5win_i2_o23-repeat_150_ep5_6006-s1042_ep5   
188  rand rew   multRNN_5win_i2_o23-repeat_25_ep5_6006-s1042_ep5   
189  rand rew   multRNN_5win_i2_o23-repeat_50_ep5_6006-s1042_ep5   
190  rand rew    multRNN_5win_i2_o23-repeat_5_ep5_6006-s1042_ep5   
191  rand rew   multRNN_5win_i2_o23-repeat_75_ep5_6006-s1042_ep5   

                                                  bins  \
0    {'single': {'counts': [12, 19, 18, 26, 34, 36]...   
1    {'single': {'counts': [17, 15, 10, 29, 19, 29]...   
2    {'si