In [None]:
import altair as alt

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv ('data/GSE68849-expression.csv') #https://jcoliver.github.io/learn-r/009-expression-heatmaps.html

In [None]:
df

A tidy version of this dataset is one in which the income values would not be columns headers but rather values in an income column. In order to tidy this dataset, we need to melt it. The pandas library has a built-in function that allows to do just that. It “unpivots” a DataFrame from a wide format to a long format. We’ll reuse this function a few times through the post.

In [None]:
formatted_df = pd.melt(df,
                       ["subject", "treatment"],
                       var_name="gene",
                       value_name="expression")
formatted_df = formatted_df.sort_values(by=["subject"])
formatted_df

In [None]:
chart = alt.Chart(formatted_df)

In [None]:
chart.mark_point()

In [None]:
chart.mark_point().encode(
    alt.X('subject:N'),
    alt.Y('gene:N')
)

In [None]:
chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N')
)

In [None]:
chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N'),
    alt.Color('expression:Q')
)

In [None]:
chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N'),
    alt.Color('expression:Q', scale=alt.Scale(type='log'))
)

In [None]:
chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N'),
    alt.Color('expression:Q', scale=alt.Scale(type='log')),
    alt.Tooltip(['subject', 'gene', 'expression'])
) 

In [None]:
control=chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N'),
    alt.Color('expression:Q', scale=alt.Scale(type='log')),
    alt.Tooltip(['subject', 'gene', 'expression']) 
).transform_filter('datum.treatment == "control"')

flu=chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N'),
    alt.Color('expression:Q', scale=alt.Scale(type='log')),
    alt.Tooltip(['subject', 'gene', 'expression']) 
).transform_filter('datum.treatment != "control"')

In [None]:
control | flu

In [None]:
chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N'),
    alt.Color('expression:Q', scale=alt.Scale(type='log',scheme='blues')),
    alt.Tooltip(['subject', 'gene', 'expression'])
)

Prueba estos esquemas de color
          "Turbo",
          "Viridis",
          "Magma",
          "Inferno",
          "Plasma",
          "Cividis",
          "DarkBlue",
          "DarkGold",
          "DarkGreen",
          "DarkMulti",
          "DarkRed",
          "LightGreyRed",
          "LightGreyTeal",
          "LightMulti",
          "LightOrange",
          "LightTealBlue",
          "Blues",
          "Browns",
          "Greens",
          "Greys",
          "Oranges",
          "Purples",
          "Reds",
          "TealBlues",
          "Teals",
          "WarmGreys",
          "BlueOrange",
          "BrownBlueGreen",
          "PurpleGreen",
          "PinkYellowGreen",
          "PurpleOrange",
          "RedBlue",
          "RedGrey",
          "RedYellowBlue",
          "RedYellowGreen",
          "BlueGreen",
          "BluePurple",
          "GoldGreen",
          "GoldOrange",
          "GoldRed",
          "GreenBlue",
          "OrangeRed",
          "PurpleBlueGreen",
          "PurpleBlue",
          "PurpleRed",
          "RedPurple",
          "YellowGreenBlue",
          "YellowGreen",
          "YellowOrangeBrown",
          "YellowOrangeRed"

In [None]:
chart.mark_rect().encode(
    alt.X('subject:N'),
    alt.Y('gene:N'),
    alt.Tooltip(['subject', 'gene', 'expression']),
    alt.Color('expression:Q', scale=alt.Scale(type='log',scheme='YellowOrangeBrown'))
)

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
df = pd.read_csv ('data/GSE68849-expression.csv')
df = df.set_index('subject')


In [None]:
df['treatment'] = ([0 if x == 'control' else 1 for x in df['treatment']])


In [None]:
df

In [None]:
# Default plot
sns.clustermap(df)

In [None]:
# Standardize:
sns.clustermap(df, standard_scale=1)

In [None]:
# Normalize
sns.clustermap(df, z_score=1)

In [None]:
# plot with correlation distance
sns.clustermap(df, metric="correlation", standard_scale=1)

In [None]:
# plot with euclidean distance
sns.clustermap(df, metric="euclidean", standard_scale=1)

In [None]:
# linkage method to use for calculating clusters: single
sns.clustermap(df, metric="euclidean", standard_scale=1, method="single")

In [None]:
# linkage method to use for calculating clusters: ward
sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward")

In [None]:
# Change color palette
sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward", cmap="mako")

In [None]:
sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward", cmap="viridis")


In [None]:
sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward", cmap="Blues")

In [None]:
import numpy as np
tree = sns.clustermap(df, metric="euclidean", standard_scale=1, method="ward")
dgram = tree.dendrogram_col.dendrogram
D = np.array(dgram['dcoord'])
I = np.array(dgram['icoord'])

In [None]:
dgram

In [None]:
D