update all

CooperStansbury · Sep 18, 2023 · 9831b77 · 9831b77
1 parent afe5c05
commit 9831b77
Show file tree

Hide file tree

Showing 84 changed files with 77,050 additions and 12,633 deletions.
diff --git a/clustering/.ipynb_checkpoints/cellCounts-checkpoint.ipynb b/clustering/.ipynb_checkpoints/cellCounts-checkpoint.ipynb
diff --git a/clustering/.ipynb_checkpoints/getExpressionFiles-checkpoint.ipynb b/clustering/.ipynb_checkpoints/getExpressionFiles-checkpoint.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14424b7f-8184-4b13-a1b3-9415edf93f14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib \n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "from textwrap import wrap\n",
+    "import scipy\n",
+    "from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable\n",
+    "from sklearn import metrics\n",
+    "import itertools\n",
+    "from sklearn.decomposition import PCA\n",
+    "from scipy.sparse import csgraph\n",
+    "import umap\n",
+    "from collections import Counter\n",
+    "import sklearn\n",
+    "from importlib import reload\n",
+    "\n",
+    "import gget\n",
+    "\n",
+    "# locals\n",
+    "import utils as ut\n",
+    "reload(ut)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa7a653f-f276-4354-bd44-a6b40ded028f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cardDir =  \"/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/CARDInputs/\"\n",
+    "xyPath =  \"/nfs/turbo/umms-indikar/shared/projects/spatial_transcriptomics/data/combinedEmbedding.csv\"\n",
+    "xy = pd.read_csv(xyPath)\n",
+    "cTypes = sorted(list(xy['cellTypes'].unique()))\n",
+    "print(cTypes)\n",
+    "print()\n",
+    "\n",
+    "keys = ['ND', 'HFD8', 'HFD14']\n",
+    "\n",
+    "rna = {}\n",
+    "labels = {}\n",
+    "\n",
+    "for key in keys:\n",
+    "    print(f\"{key}...\")\n",
+    "    labelPath = f\"{cardDir}{key}_macrophage_clusters.csv\"\n",
+    "    lf = pd.read_csv(labelPath)\n",
+    "    \n",
+    "    lf = pd.merge(lf, xy[['x', 'y', 'cellId', 'colors']], \n",
+    "                  how='left',\n",
+    "                  left_on='cellId',\n",
+    "                  right_on='cellId')\n",
+    "    \n",
+    "    labels[key] = lf\n",
+    "    \n",
+    "    rnaPath = f\"{cardDir}{key}_macrophage_rna.csv\"\n",
+    "    rf = pd.read_csv(rnaPath)\n",
+    "    rf = rf.T\n",
+    "    new_header = rf.iloc[0] \n",
+    "    rf = rf[1:] \n",
+    "    rf.columns = new_header \n",
+    "    rf.index.names = ['cellId']\n",
+    "    \n",
+    "    rf = ut.normalize(rf, 1e6)\n",
+    "    \n",
+    "    rna[key] = rf\n",
+    "\n",
+    "print('done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc6dc89c-0986-4da7-863b-1ecd6803d360",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "genes = [x.strip().upper() for x in open(\"./lindseyGenes.txt\")]\n",
+    "genes = [x for x in genes if x in rna['ND'].columns]\n",
+    "len(genes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1dd04116-88ef-4010-99f1-0bf45d5194ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "outdir = \"~/temp/\"\n",
+    "\n",
+    "for key in keys:\n",
+    "    rf = rna[key]\n",
+    "    lf = labels[key]\n",
+    "\n",
+    "\n",
+    "    pdf = pd.merge(rf[genes], lf[['cellId', 'cellType']],\n",
+    "                   how='left',\n",
+    "                   left_index=True,\n",
+    "                   right_on='cellId')\n",
+    "\n",
+    "    print(f\"{key} {rf.shape=} {lf.shape=} {pdf.shape=}\")\n",
+    "\n",
+    "    del rf\n",
+    "    del lf\n",
+    "    print(f\"{key} done merge\")\n",
+    "\n",
+    "    fpath = f\"{outdir}{key}_CPM.csv\"\n",
+    "    pdf.to_csv(fpath, index=False)\n",
+    "    print(f'{key} done')\n",
+    "\n",
+    "    # print(pdf.head())\n",
+    "    # break\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b5d59b6c-28d5-4edf-8cd2-ae28cd09fb00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd6931a0-cf3b-4b50-97f2-334c16072b26",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/clustering/.ipynb_checkpoints/globalRNA-checkpoint.ipynb b/clustering/.ipynb_checkpoints/globalRNA-checkpoint.ipynb
@@ -280,20 +280,20 @@
     }
    ],
    "source": [
-    "break"
+    "# break"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "9fbc5f62-97d7-48cc-bfbf-36c503cdee31",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "ND 2142.874702616971 1251.429962163543\n"
+      "ND, np.mean(counts)=2142.9, np.std(counts)=1251.4\n"
      ]
     }
    ],
@@ -587,7 +587,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "break"
+    "# break"
    ]
   },
   {
@@ -739,7 +739,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "break"
+    "# break"
    ]
   },
   {