In [2]:
import pandas as pd
import numpy as np
import random as rd 
from sklearn.decomposition import PCA
from sklearn import preprocessing
import matplotlib.pyplot as plt

In [3]:
genes = ['gene' + str(i) for i in range(1, 101)]

In [None]:
genes

In [4]:
wt = ['wt' + str(i) for i in range(1, 6)]
ko = ['ko' + str(i) for i in range(1, 6)]

In [5]:
data = pd.DataFrame(columns=[*wt, *ko], index=genes)

In [6]:
data

Unnamed: 0,wt1,wt2,wt3,wt4,wt5,ko1,ko2,ko3,ko4,ko5
gene1,,,,,,,,,,
gene2,,,,,,,,,,
gene3,,,,,,,,,,
gene4,,,,,,,,,,
gene5,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
gene96,,,,,,,,,,
gene97,,,,,,,,,,
gene98,,,,,,,,,,
gene99,,,,,,,,,,


In [7]:
for gene in data.index:
    data.loc[gene, 'wt1':'wt5'] = np.random.poisson(lam=rd.randrange(10, 1000), size=5)
    data.loc[gene, 'ko1':'ko5'] = np.random.poisson(lam=rd.randrange(10, 1000), size=5)

In [8]:
data

Unnamed: 0,wt1,wt2,wt3,wt4,wt5,ko1,ko2,ko3,ko4,ko5
gene1,291,260,330,307,313,691,703,713,723,746
gene2,479,461,427,392,401,895,842,834,888,879
gene3,884,893,905,912,879,831,807,858,813,840
gene4,570,492,526,527,526,90,113,107,97,94
gene5,175,151,148,166,167,491,538,502,500,521
...,...,...,...,...,...,...,...,...,...,...
gene96,975,928,882,932,944,69,80,70,69,69
gene97,640,657,656,610,624,871,857,920,867,888
gene98,746,674,643,655,703,540,562,546,555,561
gene99,43,55,54,50,57,181,204,190,202,197


In [9]:
data.T

Unnamed: 0,gene1,gene2,gene3,gene4,gene5,gene6,gene7,gene8,gene9,gene10,...,gene91,gene92,gene93,gene94,gene95,gene96,gene97,gene98,gene99,gene100
wt1,291,479,884,570,175,275,627,316,974,433,...,778,219,141,621,67,975,640,746,43,878
wt2,260,461,893,492,151,304,596,354,937,437,...,849,161,142,715,81,928,657,674,55,853
wt3,330,427,905,526,148,256,594,278,1005,426,...,824,162,150,672,65,882,656,643,54,898
wt4,307,392,912,527,166,260,570,331,957,429,...,738,165,117,666,64,932,610,655,50,824
wt5,313,401,879,526,167,281,607,340,960,482,...,771,170,143,680,68,944,624,703,57,893
ko1,691,895,831,90,491,886,543,249,270,374,...,448,746,921,474,247,69,871,540,181,140
ko2,703,842,807,113,538,868,613,235,239,377,...,485,760,929,486,246,80,857,562,204,159
ko3,713,834,858,107,502,861,591,224,265,381,...,455,807,933,472,230,70,920,546,190,144
ko4,723,888,813,97,500,870,539,236,231,400,...,471,773,991,497,204,69,867,555,202,143
ko5,746,879,840,94,521,886,520,248,251,424,...,424,727,951,514,224,69,888,561,197,138


In [10]:
scaled_data = preprocessing.scale(data.T)

In [12]:
scaled_data.shape

(10, 100)

In [13]:
from sklearn.datasets import make_classification
from sklearn.feature_selection import mutual_info_classif
X, y = make_classification(
    n_samples=100, n_features=10, n_informative=2, n_clusters_per_class=1,
    shuffle=False, random_state=42
)



In [15]:
X.shape

(100, 10)

In [18]:
X

array([[-1.17278867,  0.63356167,  0.35137231,  0.18646621,  0.95400176,
         0.65139125, -0.31526924,  0.75896922, -0.77282521, -0.23681861],
       [-1.02646717,  1.00183089,  0.57439745,  0.29736414, -0.48536355,
         0.08187414,  2.31465857, -1.86726519,  0.68626019, -1.61271587],
       [-0.59340317,  1.32272135,  0.77566157,  0.39492144, -0.47193187,
         1.0889506 ,  0.06428002, -1.07774478, -0.71530371,  0.67959775],
       [-1.33745666,  0.58590018,  0.31920247,  0.17167183, -0.73036663,
         0.21645859,  0.04557184, -0.65160035,  2.14394409,  0.63391902],
       [-1.05172286,  1.00459142,  0.57547144,  0.29811551, -2.02514259,
         0.18645431, -0.66178646,  0.85243333, -0.79252074, -0.11473644],
       [-0.36699364,  1.88782031,  1.11793022,  0.56509697,  0.50498728,
         0.86575519, -1.20029641, -0.33450124, -0.47494531, -0.65332923],
       [-1.52165918,  0.53358465,  0.28381293,  0.15542234,  1.76545424,
         0.40498171, -1.26088395,  0.91786195

In [16]:
y.shape

(100,)

In [19]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [17]:
mutual_info_classif(X, y)

array([0.58875393, 0.10657571, 0.19641896, 0.0968634 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])

In [20]:
from sklearn.datasets import make_regression
from sklearn.feature_selection import mutual_info_regression
X, y = make_regression(
    n_samples=50, n_features=3, n_informative=1, noise=1e-4, random_state=42
)



In [23]:
X.shape

(50, 3)

In [25]:
y.shape

(50,)

In [26]:
mutual_info_regression(X, y)

array([0.11701995, 2.645872  , 0.02869703])