In [6]:
#Differential Expression Analysis
import pandas as pd
import numpy as np

data=pd.read_csv("dea_data.csv")
data.head()

Unnamed: 0,Gene,Control1,Control2,Control3,Control4,Control5,Control6,Control7,Control8,Control9,...,Treatment16,Treatment17,Treatment18,Treatment19,Treatment20,Treatment21,Treatment22,Treatment23,Treatment24,Treatment25
0,Gene1,56,97,19,76,65,25,87,91,79,...,55,59,68,7,55,11,25,77,43,22
1,Gene2,8,93,64,18,13,94,57,6,88,...,91,66,44,89,84,86,57,28,30,93
2,Gene3,64,45,33,19,49,69,93,75,13,...,82,7,5,9,94,18,31,13,83,19
3,Gene4,94,46,81,55,67,56,8,98,27,...,74,76,31,13,66,41,55,48,28,83
4,Gene5,63,36,92,56,66,62,56,16,43,...,77,63,74,84,97,7,24,63,40,23


In [8]:
genes = data['Gene']
control_cols = [col for col in data.columns if "Control" in col]
treatment_cols = [col for col in data.columns if "Treatment" in col]
print("Control col:",control_cols)
print("Treatment Col:",treatment_cols)

Control col: ['Control1', 'Control2', 'Control3', 'Control4', 'Control5', 'Control6', 'Control7', 'Control8', 'Control9', 'Control10', 'Control11', 'Control12', 'Control13', 'Control14', 'Control15', 'Control16', 'Control17', 'Control18', 'Control19', 'Control20', 'Control21', 'Control22', 'Control23', 'Control24', 'Control25']
Treatment Col: ['Treatment1', 'Treatment2', 'Treatment3', 'Treatment4', 'Treatment5', 'Treatment6', 'Treatment7', 'Treatment8', 'Treatment9', 'Treatment10', 'Treatment11', 'Treatment12', 'Treatment13', 'Treatment14', 'Treatment15', 'Treatment16', 'Treatment17', 'Treatment18', 'Treatment19', 'Treatment20', 'Treatment21', 'Treatment22', 'Treatment23', 'Treatment24', 'Treatment25']


In [10]:
control_means = data[control_cols].mean(axis=1)
treatment_means = data[treatment_cols].mean(axis=1)
print("control mean:",control_means)
print("\ntreatment mean:",control_means)

control mean: 0     53.52
1     47.08
2     45.16
3     51.44
4     48.04
      ...  
95    51.56
96    49.32
97    45.40
98    58.08
99    51.72
Length: 100, dtype: float64

treatment mean: 0     53.52
1     47.08
2     45.16
3     51.44
4     48.04
      ...  
95    51.56
96    49.32
97    45.40
98    58.08
99    51.72
Length: 100, dtype: float64


In [11]:
log2fc = np.log2(treatment_means / control_means)
print("log2 fold change for each gene:",log2fc)

log2 fold change for each gene: 0     0.036202
1     0.426635
2     0.085582
3     0.058265
4     0.148274
        ...   
95   -0.187378
96    0.086292
97    0.504368
98   -0.062957
99   -0.107688
Length: 100, dtype: float64


In [13]:
def classify(fc):
    if fc >= 1:
        return "Upregulated"
    elif fc <= -1:
        return "Downregulated"
    else:
        return "Not significant"

status = log2fc.apply(classify)

dea_results = pd.DataFrame({
    "Gene": genes,
    "Control Mean": control_means.round(2),
    "Treatment Mean": treatment_means.round(2),
    "Log2FC": log2fc.round(2),
    "Status": status
})

print("Differential Expression Analysis Results:")
display(dea_results.head(10))  

Differential Expression Analysis Results:


Unnamed: 0,Gene,Control Mean,Treatment Mean,Log2FC,Status
0,Gene1,53.52,54.88,0.04,Not significant
1,Gene2,47.08,63.28,0.43,Not significant
2,Gene3,45.16,47.92,0.09,Not significant
3,Gene4,51.44,53.56,0.06,Not significant
4,Gene5,48.04,53.24,0.15,Not significant
5,Gene6,51.12,45.72,-0.16,Not significant
6,Gene7,48.36,45.0,-0.1,Not significant
7,Gene8,58.0,41.8,-0.47,Not significant
8,Gene9,57.8,50.56,-0.19,Not significant
9,Gene10,48.2,60.92,0.34,Not significant


In [14]:
dea_results.to_csv("dea_results.csv", index=False)
print("Results saved to dea_results.csv")

Results saved to dea_results.csv
