In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
import sys
import natsort
import GrowthAssayPlotlib

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
df1 = pd.read_json('GA1_df_melted.json')
df2 = pd.read_json('GA2_df_melted.json')
df3 = pd.read_json('GA3_df_melted.json')

df1['Repeat'] = '1'
df2['Repeat'] = '2'
df3['Repeat'] = '3'

hematocrit_levels = ['2%', '4%', '8%']

df = pd.concat([
    df1, 
    df2, 
    df3
    ])
df['Hematocrit'] = df['Hematocrit'].fillna('4%')
df['Plate'] = df['Plate'].fillna('6-well')
df

In [None]:
filter_lines = ['NF54', 'Dd2', '3D7']
dfq = df.query(f'Plate == "6-well" and Hematocrit == "4%" and Strain == @filter_lines' )
# dfq = df.query(f'Plate == "6-well" and Hematocrit == "4%" and Strain == @filter_lines and Repeat == "3"' )

MAX_Y = 25


sns.set_theme(#context='notebook', 
            #style='ticks', 
            style='darkgrid', 
            font='arial', 
            font_scale=1) 
            #   color_codes=True, 
            #   rc=None)
sns.set_style(rc = {'axes.facecolor': '#F5F5F9'})

for strain, dfg in dfq.groupby('Strain'):
    print(strain, len(dfg.index))

    plt.figure(figsize=(3,4), dpi=300)
    sns.boxplot(x='Speed', 
                y='Invasion rate', 
                data=dfg, 
                order=GrowthAssayPlotlib.speed_order, 
                #hue='Repeat', # This plots repeats separate for NF54
                #palette='rocket'
                )
    sns.swarmplot(x='Speed', 
                  y='Invasion rate', 
                  data=dfg, order=GrowthAssayPlotlib.speed_order, 
                  #hue='Repeat', 
                  dodge=True, 
                  color='black', 
                  size=3
                  )
    plt.title(strain)
    plt.ylim((0, MAX_Y))
    #plt.ylim((0, 12.5))
    plt.ylabel(GrowthAssayPlotlib.YLABEL)

    plt.rcParams['svg.fonttype'] = 'none'
    plt.savefig(f'Graphs all/Lines over different speeds {strain}.svg', bbox_inches='tight')
    # plt.close()

In [None]:

GrowthAssayPlotlib.significance_testing(
    df=dfq, 
    group_keys=['Strain', 'Speed', 'Repeat'],
    line_key='Strain',
    wildtype='NF54',
)



In [None]:

dfq = df.query(f'Plate == "6-well" and Hematocrit == "4%" and Strain == @filter_lines' )

def sem(data):
    return np.std(data, ddof=1) / np.sqrt(np.size(data))

for (Speed, Strain, Repeat), dfg in dfq.groupby(['Speed','Strain','Repeat']):
    data = dfg['Invasion rate']
    print(f'{Strain}, {Speed}, {Repeat}, {np.mean(data):.2f}, {sem(data):.2f}')

# for (Speed, Strain), dfg in dfq.groupby(['Speed','Strain']):
#     data = dfg['Invasion rate']
#     print(f'{Strain}, {Speed} {np.mean(data):.2f}, {sem(data):.2f}')

print(filter_lines)
df['Strain'].unique()


In [None]:
# df2_melted_q = df_hem.query('Hematocrit != ["2%", "8%"]').copy()
# df2_melted_q['Line']  = df2_melted_q['Line simple']

# df3_melted_q = df3_melted.query('Plate == "6-well"').copy()

# df2_melted_q['Line'] = df2_melted_q['Line'] + ' GA2'
# df3_melted_q['Line'] = df3_melted_q['Line'] + ' GA3'

# df_merged = pd.concat([df2_melted_q, df3_melted_q], ignore_index=True)

# print(df_merged['Line'].unique())

# line_order_filter = [
#     'Dd2 GA3', 
#     '3D7 GA2', 
#     'NF54 GA2', 
#     'NF54 GA3',
#     'KOEBA175 C6 GA2', 
#     # 'KOEBA175 3D7 GA2', 
#     # 'KOEBA175 C1 GA2',
#     # 'KOEBA175 C2 GA2', 
#     # 'KOEBA175 mix GA2',
# ]

# dfq = df_merged.query('Line == @line_order_filter')

# plt.figure(figsize=(6,4), dpi=300)
# sns.boxplot(x='Speed', order=speed_order, y='Invasion rate', data=dfq, hue='Line', hue_order=line_order_filter, palette='rocket')
# sns.swarmplot(x='Speed', order=speed_order, y='Invasion rate', data=dfq, hue='Line', hue_order=line_order_filter, color='black', dodge=True, size=5, legend=False)
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# plt.title(f'')
# plt.ylim((0,MAX_INVASION_RATE_PLOT))
# plt.savefig(os.path.join(output_folder_ga3, f'Comparison line.{EXTENSION}'), bbox_inches='tight')
# plt.close()

In [None]:
from scipy.stats import linregress

dfq = df.query('Strain == "NF54" and Plate == "6-well"')

# sns.lmplot(
#     data=dfq,
#     x='Parasitemia',
#     y='Invasion rate',
#     hue='Speed',
# )

# def fit_line_and_print_stats(x, y, **kwargs):
#     slope, intercept, r_value, p_value, std_err = linregress(x, y)
#     fit_line_eq = f'y = {slope:.2f}x + {intercept:.2f}'
#     r_squared = r_value**2
#     return slope, intercept, r_value, p_value, std
#     # Print equation and R-squared
#     print(f'Line Equation: {fit_line_eq}, R-squared: {r_squared:.4f}')

colors = sns.color_palette('rocket', n_colors=4)

color_map = {s:c for s, c in zip(GrowthAssayPlotlib.speed_order, colors)}

plt.figure(figsize=(4,4), dpi=300)
for speed, dfg in dfq.groupby('Speed'):
    c = color_map[speed]
    slope, intercept, r_value, p_value, std_err = linregress(dfg['Parasitemia'], dfg['Invasion rate'])
    plt.plot(
        dfg['Parasitemia'],
        dfg['Invasion rate'],
        '.',
        color=c,
        label=f'{speed}, m={slope:.1f}, r={r_value:.2f}',
    )
    xs = np.array([0,1])
    ys = slope * xs + intercept
    plt.plot(xs, ys, color=c, alpha=0.4)

    print(speed, slope, intercept, r_value, p_value, std_err)

plt.legend()
plt.xlabel('Parasitemia')
plt.ylabel('Invasion rate')