# 各脳部位間の投射関係の重み付き有向グラフ化（projection_graphs）

projections, specimensテーブルを元に、脳部位Aから脳部位Bへの投射関係を示すデータ列を作成する。

In [None]:
# Pythonライブラリインストール
!python --version
!pip install psycopg2-binary
!pip install python-dotenv

!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install plotly
!pip install scikit-learn
!pip install sqlalchemy


## 環境変数
supabase接続用URL,APIキーと、openai api接続用のAPIキーを設定します。
自身のopenaiアカウントからapi keyを取得してください。

https://platform.openai.com/account/api-keys

supabaseの情報は管理者にお尋ねください。

下記の例では、.envファイルに変数を書き込んで、JupiterNotebookで読み込む仕様で実装しております。

※.envファイルの作成が困難、.envファイルから値を読み込めない場合、
　os.getenv("◯◯")部分に変数値を直接書き込んでいただいても動作自体には問題ありません。

In [2]:
# 環境変数
import os
from dotenv import load_dotenv
load_dotenv()

import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import text
import urllib.parse
from IPython.display import display

# supabase接続用変数
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")
db_user = os.getenv("DB_USER")
db_pass = os.getenv("DB_PASS")

# Connect to the database
connection_config = {
    'user': db_user,
    'password': urllib.parse.quote_plus(db_pass),
    'host': db_host,
    'port': db_port, 
    'database': db_name
}
engine = create_engine('postgresql://{user}:{password}@{host}:{port}/{database}'.format(**connection_config))

print('環境変数読み込み完了')

環境変数読み込み完了


# 処理実行

structures全ての要素iに対して下記を実行

1. 要素iが注入部位となった実験結果を抽出
2. "normalized-projection-volume",　"projection-density",　"projection-energy",　"projection-intensity",　"projection-volume"の平均値を計算
3. projection_graphsテーブルに保存

In [None]:
threshold_level = 4

# structuresを全て書き出し
sql ="""
SELECT
    id,
    name,
    acronym,
    "st-level",
    "parent-structure-id"
FROM
    structures
ORDER BY
    "st-level" ASC;"""
with engine.begin() as conn:
    query = text(sql)
    df_structures = pd.read_sql_query(query, conn)

# 子要素を持たない要素
terminal_structure_ids = df_structures.loc[~df_structures['id'].isin(df_structures['parent-structure-id'])]['id']


def get_children(parent_id):
    return df_structures[df_structures["parent-structure-id"] == parent_id]

def build_hierarchy(parent_id, level=0):
    children = get_children(parent_id)
    hierarchy = {}
    for _, child in children.iterrows():
        child_id = child["id"]
        hierarchy[child_id] = {"level": level, "acronym": child['acronym'], "children": build_hierarchy(child_id, level + 1)}
        structure_ids.append(child_id)
    return hierarchy

df_small_structures = df_structures[df_structures['st-level'] > threshold_level]

# Iterate through each row in the DataFrame
for index, row in df_small_structures.iterrows():
    if(index<486):
        continue
    print(f'Index: {index}/{len(df_small_structures)}, Name: {row["name"]}, Acronym: {row["acronym"]}, st-level: {row["st-level"]}')

    structure_ids = [row["id"]]
    hierarchy = build_hierarchy(row["id"])

    print(structure_ids)

    # 投射情報
    sql =f"""
    SELECT
        p.id,
        p."structure-id" AS "projected-structure-id",
        s.name,
        s.acronym,
        p."normalized-projection-volume",
        p."projection-density",
        p."projection-energy",
        p."projection-intensity",
        p."projection-volume",
        sp."structure-id"
    FROM projections AS p
        INNER JOIN specimens AS sp ON p."experiment-id" = sp."experiment-id"
        INNER JOIN structures AS s ON p."structure-id" = s.id
    WHERE
        sp."structure-id" IN :structure_ids
        AND p."is-injection" = false
        AND s."st-level" > {threshold_level};
    """
    with engine.begin() as conn:
        query = text(sql)
        df_projections = pd.read_sql_query(query, conn, params={"structure_ids": tuple(structure_ids)})

        #display(df_projections)

    if(len(df_projections)>0):
        df = pd.DataFrame()
        average_normalized_volume_df = df_projections.groupby('projected-structure-id')['normalized-projection-volume'].mean().to_frame('normalized-projection-volume')
        average_density_df = df_projections.groupby('projected-structure-id')['projection-density'].mean().to_frame('projection-density')
        average_energy_df = df_projections.groupby('projected-structure-id')['projection-energy'].mean().to_frame('projection-energy')
        average_intensity_df = df_projections.groupby('projected-structure-id')['projection-intensity'].mean().to_frame('projection-intensity')
        average_volume_df = df_projections.groupby('projected-structure-id')['projection-volume'].mean().to_frame('projection-volume')

        # Concatenate the three DataFrames
        df = pd.concat([average_normalized_volume_df, average_density_df, average_energy_df, average_intensity_df, average_volume_df], axis=1)

        # Set the float_format option to display floats with a fixed number of decimal places
        pd.set_option('display.float_format', '{:.10f}'.format)

        #display(df)
        
        df['injected-structure-id'] = row["id"]
        df['projected-structure-id'] = df.index
        df=df.reset_index(drop=True)

        df['graph-id'] = df['injected-structure-id'].astype(str) + "->" + df['projected-structure-id'].astype(str)

        display(df)

        # Save the concatenated DataFrame to SQL table
        #df.to_sql('projection_graphs', con=engine, if_exists='replace', index=False) 
        
        # Assuming df is your DataFrame
        chunk_size = 2000  # Adjust this value according to your needs
        # Calculate the number of chunks
        num_chunks = (len(df) // chunk_size) + 1

        # Iterate over each chunk and write to the database
        for i in range(num_chunks):
            start_idx = i * chunk_size
            end_idx = (i + 1) * chunk_size
            df_chunk = df.iloc[start_idx:end_idx]
            try:
                df_chunk.to_sql('projection_graphs', con=engine, if_exists='append', index=False)
            except Exception as e:
                print(f"An error occurred while writing the DataFrame to the SQL table: {e}")

