In [None]:
import pandas as pd

In [None]:
expr_graph='c_elegans.PRJNA13758.WS287.expr_graph.csv'
clean_str = lambda x: x.replace('"','').strip()
clean_float = lambda x: float(x.replace('"','').strip())

columns = ["Gene", "Gene_name", "Life_stage", "Library", "Protocol", "FPKM_value"]
expr_graph_df = pd.read_csv(expr_graph,low_memory=False, header=None, names=columns,
                            skiprows=1,
                            converters={'Gene_name':clean_str, 'Life_stage':clean_str, 
                                        'Library':clean_str, 'Protocol':clean_str,
                                        'FPKM_value':clean_float})


In [None]:
print(f"{len(expr_graph_df):,}")

In [None]:
expr_graph_df

In [None]:
expr_graph_df['Life_stage'].value_counts()

In [None]:
sams_4 = expr_graph_df.query("Gene == 'WBGene00015540'")
sams_4

In [None]:
classical_stages=['EE','LE','L1','L2','L3','L4','YA']
classical_stages_df = sams_4.query("Life_stage in @classical_stages")
median_df = classical_stages_df.query("Protocol == 'Median'")
ribozero_df = classical_stages_df.query("Protocol == 'ribozero'")
polya_df = classical_stages_df.query("Protocol == 'polyA'")

classical_stages_df = classical_stages_df.sort_values(by=['Life_stage'], ascending=True)
classical_stages_df

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def get_data_dict(data_df):
    classical_stages = ['EE','LE','L1','L2','L3','L4','YA']
    stages_dict = {val:index for index,val in enumerate(classical_stages)}
    swap_dict   = {index:val for index,val in enumerate(classical_stages)}
    
    #columns = ["Gene", "Gene_name", "Life_stage", "Library", "Protocol", "FPKM_value"]
    col_dict = {val:index for index,val in enumerate(data_df.columns)}
    
    
    classical_stages_df = data_df.query("Life_stage in @classical_stages")
    #median_df = classical_stages_df.query("Protocol == 'Median'")
    #ribozero_df = classical_stages_df.query("Protocol == 'ribozero'")
    #polya_df = classical_stages_df.query("Protocol == 'polyA'")
    
    
    
    median_fpkm=[0]*7
    points_ribozero_x = []
    points_ribozero_y = []
    points_polya_x = []
    points_polya_y = []
    
    for index,row in classical_stages_df.iterrows():
        #print(f"{row[col_dict['Life_stage']]=}, {row[col_dict['Protocol']]=}, {row[col_dict['FPKM_value']]=}")
        if row[col_dict['Protocol']] == 'Median':
            median_fpkm[stages_dict[row[col_dict['Life_stage']]]] = row[col_dict['FPKM_value']]
        elif row[col_dict['Protocol']] == 'ribozero':
            points_ribozero_y.append(row[col_dict['FPKM_value']])
            points_ribozero_x.append(row[col_dict['Life_stage']])                        
        elif row[col_dict['Protocol']] == 'polyA':
            points_polya_y.append(row[col_dict['FPKM_value']])
            points_polya_x.append(row[col_dict['Life_stage']])                        
            
            
    ret_val={'median':(classical_stages,median_fpkm),
             'ribozero':(points_ribozero_x,points_ribozero_y),
             'polya':(points_polya_x,points_polya_y)
            }
        
    return ret_val
    
def create_plot_classical_stages(plot_data, gene_name):
    
    x = classical_stages
    y = classical_fpkm

    fig, ax = plt.subplots(figsize=(4, 4))
    ax.set_ylim([0, 150])
    # Create bar plot
    plt.bar(plot_data['median'][0], plot_data['median'][1], color='lightgrey', width=0.5, zorder=2)
    plt.scatter(plot_data['ribozero'][0], plot_data['ribozero'][1], marker='D', s=10,color='purple',zorder=3)
    plt.scatter(plot_data['polya'][0], plot_data['polya'][1], color='green',s=10,zorder=4)

    # Set title and labels
    plt.title(f"Classical stages {gene_name}")
    plt.ylabel("Expression (FPMK)")
    plt.xlabel("Life stages")
    plt.grid(axis='y', linestyle='-', linewidth=0.5)
    #plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor")
    # Show plot
    plt.show()


In [None]:
data = get_data_dict(sams_4)
print(data)

In [None]:
create_plot_classical_stages(data, "sams-4")

In [None]:
disease_association_df.columns
new_names = {x:f'a{x}' for x in disease_association_df.columns}
disease_association_df = disease_association_df.rename(columns=new_names)
disease_association_df.columns

In [None]:
disease_association_df['a1'].value_counts()

In [None]:
ttt = disease_association_df.query("a1 == 'gene'")
len(ttt)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Fixing random state for reproducibility
np.random.seed(19680801)

dt = 0.01
t = np.arange(0, 30, dt)
nse1 = np.random.randn(len(t))                 # white noise 1
nse2 = np.random.randn(len(t))                 # white noise 2

# Two signals with a coherent part at 10 Hz and a random part
s1 = np.sin(2 * np.pi * 10 * t) + nse1
s2 = np.sin(2 * np.pi * 10 * t) + nse2

fig, axs = plt.subplots(2, 1)
axs[0].plot(t, s1, t, s2)
axs[0].set_xlim(0, 2)
axs[0].set_xlabel('Time')
axs[0].set_ylabel('s1 and s2')
axs[0].grid(True)

cxy, f = axs[1].cohere(s1, s2, 256, 1. / dt)
axs[1].set_ylabel('Coherence')

fig.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Create some data
x = np.array(['A', 'B', 'C', 'D'])
y = np.array([3, 8, 1, 10])
point_x = np.array(['B'])
point_y = np.array([6])

# Create a bar plot
plt.bar(x, y, zorder=2)

# Show a scatter plot point on top of the bar
plt.scatter(point_x, point_y, color='red', zorder=3)

# Set the limits of the y-axis to make the point visible
#plt.ylim([0, 12])

# Show the plot
plt.show()


In [None]:
disease_association_df.index