# Notebook Data Interpretation

### This notebook summarizes the complete dataframes for the GR and DUSP1 experiments.

```
Author: Eric Ron and Luis U. Aguilera
Contact Info: luis.aguilera@colostate.edu

Copyright (c) 2024 Munsky Group 
Colorado State University 
Licensed under BSD 3-Clause License.
```
----

### Notebook summary 

- The code downloads and reads a complete quantification dataframe from the NAS (munsky-nas). 
- The data consist of 3 different experimental conditions:
  - ```GR_timesweep``` :
  - ```DUSP1_timesweep``` :
  - ```DUSP1_TPL``` :
- The code reads the complete dataframe and returns a pandas.DataFrame with the fields:
  -  Cell_id
  -  Condition 
  -  Replica 
  -  Dex_Conc 
  -  Time_index 
  -  Time_TPL 
  -  Nuc_Area 
  -  Cyto_Area 
  -  Nuc_GR_avg_int 
  -  Cyto_GR_avg_int
  -  Nuc_DUSP1_avg_int 
  -  Cyto_DUSP1_avg_int 
  -  RNA_DUSP1_nuc 
  -  RNA_DUSP1_cyto 
  -  DUSP1_ts_size_0 
  -  DUSP1_ts_size_1 
  -  DUSP1_ts_size_2 
  -  DUSP1_ts_size_3

- All processing is performing using class ```DataManagement```. When information was not collected for a particular experiment the dataframe is populated with NaNs.
----

### Loading libraries and Modules

In [None]:
import DataManagement as DM
import pathlib
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
# Directories
current_dir = pathlib.Path().absolute()
dataframe_path = current_dir.joinpath('output_dataframes','Complete_dataframe_Ron_2024.csv') 


In [None]:
complete_df = pd.read_csv(dataframe_path, index_col=0)
complete_df

In [None]:
# Plot histogram
plt.hist(complete_df['Nuc_Area'], bins=100, color='orangered', edgecolor='w')
print("Max value in Nuc_Area: ", int(complete_df['Nuc_Area'].max()) ,'\n', 
      "Mean value in Nuc_Area: ", int(complete_df['Nuc_Area'].mean()),'\n', 
      "Min value in Nuc_Area: ", int(complete_df['Nuc_Area'].min()))
# Set title and labels
plt.title('Histogram of Nuc_Area')
plt.xlabel('Nuc_Area')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['Cyto_Area'], bins=100, color='orangered', edgecolor='w')
print("Max value in Cyto_Area: ", int(complete_df['Cyto_Area'].max()) ,'\n', 
      "Mean value in Cyto_Area: ", int(complete_df['Cyto_Area'].mean()),'\n', 
      "Min value in Cyto_Area: ", int(complete_df['Cyto_Area'].min()))
# Set title and labels
plt.title('Histogram of Cyto_Area')
plt.xlabel('Cyto_Area')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:

# Plot histogram
plt.hist(complete_df['Nuc_GR_avg_int'], bins=100, color='orangered', edgecolor='w')
print("Max value in Nuc_GR_avg_int: ", int(complete_df['Nuc_GR_avg_int'].max()) ,'\n', 
        "Mean value in Nuc_GR_avg_int: ", int(complete_df['Nuc_GR_avg_int'].mean()),'\n', 
        "Min value in Nuc_GR_avg_int: ", int(complete_df['Nuc_GR_avg_int'].min()))
# Set title and labels
plt.title('Histogram of Nuc_GR_avg_int')
plt.xlabel('Nuc_GR_avg_int')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['Cyto_GR_avg_int'], bins=100, color='orangered', edgecolor='w')
print("Max value in Cyto_GR_avg_int: ", int(complete_df['Cyto_GR_avg_int'].max()) ,'\n', 
      "Mean value in Cyto_GR_avg_int: ", int(complete_df['Cyto_GR_avg_int'].mean()),'\n', 
      "Min value in Cyto_GR_avg_int: ", int(complete_df['Cyto_GR_avg_int'].min()))
# Set title and labels
plt.title('Histogram of Cyto_GR_avg_int')
plt.xlabel('Cyto_GR_avg_int')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:

# Plot histogram
plt.hist(complete_df['Nuc_DUSP1_avg_int'], bins=100, color='orangered', edgecolor='w')
print("Max value in Nuc_DUSP1_avg_int: ", int(complete_df['Nuc_DUSP1_avg_int'].max()) ,'\n', 
        "Mean value in Nuc_DUSP1_avg_int: ", int(complete_df['Nuc_DUSP1_avg_int'].mean()),'\n', 
        "Min value in Nuc_DUSP1_avg_int: ", int(complete_df['Nuc_DUSP1_avg_int'].min()), '\n',
        "Total Nuc_DUSP1_avg_int: ", int(complete_df['Nuc_DUSP1_avg_int'].sum()/10000 ) )
# Set title and labels
plt.title('Histogram of Nuc_DUSP1_avg_int')
plt.xlabel('Nuc_DUSP1_avg_int')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['Cyto_DUSP1_avg_int'], bins=100, color='orangered', edgecolor='w')
print("Max value in Cyto_DUSP1_avg_int: ", int(complete_df['Cyto_DUSP1_avg_int'].max()) ,'\n', 
      "Mean value in Cyto_DUSP1_avg_int: ", int(complete_df['Cyto_DUSP1_avg_int'].mean()),'\n', 
      "Min value in Cyto_DUSP1_avg_int: ", int(complete_df['Cyto_DUSP1_avg_int'].min()), '\n',
      "Total Cyto_DUSP1_avg_int: ", int(complete_df['Cyto_DUSP1_avg_int'].sum()/10000 ) )
# Set title and labels
plt.title('Histogram of Cyto_DUSP1_avg_int')
plt.xlabel('Cyto_DUSP1_avg_int')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['Cyto_DUSP1_avg_int'], bins=80, color='orangered', edgecolor='w', alpha=0.5, label='Cyto_DUSP1_avg_int')
plt.hist(complete_df['Nuc_DUSP1_avg_int'], bins=80, color='blue', edgecolor='w', alpha=0.4, label='Nuc_DUSP1_avg_int')
plt.xlim([650, 1800])
# Set title and labels
plt.title('Histogram of Nuc_DUSP1_avg_int and Cyto_DUSP1_avg_int')
plt.xlabel('Nuc_DUSP1_avg_int')
plt.ylabel('Frequency')
# Add legend
plt.legend()
# Show the plot
plt.show()

In [None]:

# Plot histogram
plt.hist(complete_df['RNA_DUSP1_nuc'], bins=100, color='orangered', edgecolor='w')
print("Max value in RNA_DUSP1_nuc: ", int(complete_df['RNA_DUSP1_nuc'].max()) ,'\n', 
        "Mean value in RNA_DUSP1_nuc: ", int(complete_df['RNA_DUSP1_nuc'].mean()),'\n', 
        "Min value in RNA_DUSP1_nuc: ", int(complete_df['RNA_DUSP1_nuc'].min()))
# Set title and labels
plt.title('Histogram of RNA_DUSP1_nuc')
plt.xlabel('RNA_DUSP1_nuc')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['RNA_DUSP1_cyto'], bins=100, color='orangered', edgecolor='w')
print("Max value in RNA_DUSP1_cyto: ", int(complete_df['RNA_DUSP1_cyto'].max()) ,'\n', 
        "Mean value in RNA_DUSP1_cyto: ", int(complete_df['RNA_DUSP1_cyto'].mean()),'\n', 
        "Min value in RNA_DUSP1_cyto: ", int(complete_df['RNA_DUSP1_cyto'].min()))
# Set title and labels
plt.title('Histogram of RNA_DUSP1_cyto')
plt.xlabel('RNA_DUSP1_cyto')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['DUSP1_ts_size_0'], bins=200, color='orangered', edgecolor='w')
print("Max value in DUSP1_ts_size_0: ", int(complete_df['DUSP1_ts_size_0'].max()) ,'\n', 
        "Mean value in DUSP1_ts_size_0: ", int(complete_df['DUSP1_ts_size_0'].mean()),'\n', 
        "Min value in DUSP1_ts_size_0: ", int(complete_df['DUSP1_ts_size_0'].min()))
# Set title and labels
plt.xlim([0, 50])
plt.title('Histogram of DUSP1_ts_size_0')
plt.xlabel('DUSP1_ts_size_0')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
selected_rows = complete_df[complete_df['DUSP1_ts_size_0'] > 200]
selected_rows

In [None]:
# Plot histogram
plt.hist(complete_df['DUSP1_ts_size_1'], bins=20, color='orangered', edgecolor='w')
print("Max value in DUSP1_ts_size_1: ", int(complete_df['DUSP1_ts_size_1'].max()) ,'\n', 
        "Mean value in DUSP1_ts_size_1: ", int(complete_df['DUSP1_ts_size_1'].mean()),'\n', 
        "Min value in DUSP1_ts_size_1: ", int(complete_df['DUSP1_ts_size_1'].min()))
# Set title and labels
#plt.xlim([00, 50])
plt.title('Histogram of DUSP1_ts_size_1')
plt.xlabel('DUSP1_ts_size_1')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['DUSP1_ts_size_2'], bins=20, color='orangered', edgecolor='w')
print("Max value in DUSP1_ts_size_2: ", int(complete_df['DUSP1_ts_size_2'].max()) ,'\n', 
        "Mean value in DUSP1_ts_size_2: ", int(complete_df['DUSP1_ts_size_2'].mean()),'\n', 
        "Min value in DUSP1_ts_size_2: ", int(complete_df['DUSP1_ts_size_2'].min()))
# Set title and labels
plt.title('Histogram of DUSP1_ts_size_2')
plt.xlabel('DUSP1_ts_size_2')
plt.ylabel('Frequency')
# Show the plot
plt.show()

In [None]:
# Plot histogram
plt.hist(complete_df['DUSP1_ts_size_3'], bins=20, color='orangered', edgecolor='w')
print("Max value in DUSP1_ts_size_3: ", int(complete_df['DUSP1_ts_size_3'].max()) ,'\n', 
        "Mean value in DUSP1_ts_size_3: ", int(complete_df['DUSP1_ts_size_3'].mean()),'\n', 
        "Min value in DUSP1_ts_size_3: ", int(complete_df['DUSP1_ts_size_3'].min()))
# Set title and labels
plt.title('Histogram of DUSP1_ts_size_3')
plt.xlabel('DUSP1_ts_size_3')
plt.ylabel('Frequency')
# Show the plot
plt.show()