In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import sigfig

from qPCR_plotting import *
import EmmaPlotColors

# PICK ONE!
normalization_line = 'NF54'
#normalization_line = 'KOP230P'
# normalization_line = 'KOPfs25'

normalization_gene = 'Actin1'

if normalization_line == 'KOP230P':
    df = pd.read_json('qpcr_df_05_KOP230P.json')
elif normalization_line == 'KOPfs25':
    df = pd.read_json('qpcr_df_05_KOPfs25.json')
else:
    df = pd.read_json('qpcr_df_05_NF54.json')

output_folder = get_output_folder(experiment='qPCR_all')
line_normalization_title = f'Expression fold change to {normalization_line} (normalised to {normalization_gene})'

dfq = df.query('gene != @normalization_gene and line != @normalization_line')
dfq = dfq.query('gene != "Pfcyp87"') # remove this gene from the dataframe
print(output_folder)

list(df.columns)

In [None]:
import scipy.stats as stats

sig = []
for (line, gene), dfg in dfq.groupby(['line', 'gene']):
    datapoints = dfg[line_normalization_title]

    t_stat, p_value = stats.ttest_1samp(datapoints, popmean=1)
    sig.append((line, gene, t_stat, p_value, np.mean(datapoints), np.std(datapoints)))

df_sig = pd.DataFrame(sig, columns=['line', 'gene', 't_stat', 'p_value', 'mean', 'std'])
df_sig['significant'] = df_sig['p_value'] < 0.05

df_sig.query('significant == True')

In [None]:
log_barplot(
    #df=dfq,
    df=dfq.query('line != "KOEBA175"'),
    x_key='gene',
    y_key=line_normalization_title,
    hue_key='line',
    x_order=EmmaPlotColors.gene_order,
    hue_order=EmmaPlotColors.line_order,
    colors=colors_dark,
    title=f"Gene Expression Fold Change to {normalization_line} ({normalization_gene})",
    ylabel=f"Fold Change (normalized to {normalization_gene})",
    output_folder=output_folder,
    figsize=(10,5),
)

In [None]:

# plot each line
list(df.columns)
for line, dfg in dfq.groupby('line'):
    log_barplot(
        df=dfg,
        x_key='gene',
        y_key=line_normalization_title,
        hue_key='line',
        x_order=EmmaPlotColors.gene_order,
        hue_order=EmmaPlotColors.line_order,
        colors=colors_dark,
        title=f"Line {line}, Gene Expression Fold Change to {normalization_line} ({normalization_gene})",
        ylabel=f"Fold Change (normalized to {normalization_gene})",
        output_folder=output_folder,
        figsize=(6,3.5),
        font_scale=0.75,
    )

In [None]:
# plot each repeat
for sample, dfg in dfq.groupby('sample'):
    log_barplot(
        df=dfg.query('line != "KOEBA175"'),
        x_key='gene',
        y_key=line_normalization_title,
        hue_key='line',
        x_order=EmmaPlotColors.gene_order,
        hue_order=EmmaPlotColors.line_order,
        colors=colors_dark,
        title=f"Sample {sample}, Gene Expression Fold Change to {normalization_line} ({normalization_gene})",
        ylabel=f"Fold Change (normalized to {normalization_gene})",
        output_folder=output_folder,
        figsize=(10,5),
    )
