In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load genomics data
genomics_data = pd.read_csv('genomics_data.csv')

# Load transcriptomics data
transcriptomics_data = pd.read_csv('transcriptomics_data.csv')

# Assuming both datasets have a common identifier (e.g., patient ID)
# Merge datasets on the common identifier
merged_data = pd.merge(genomics_data, transcriptomics_data, on='patient_id')

# Separate features (X) and target labels (if applicable)
X = merged_data.drop('target_column', axis=1)  # Drop target column if present
y = merged_data['target_column']  # Assuming 'target_column' is the column you want to predict

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform PCA for dimensionality reduction
pca = PCA(n_components=2)  # You can choose the number of components based on your requirement
X_pca = pca.fit_transform(X_scaled)

# Create a DataFrame for the reduced data
pca_df = pd.DataFrame(data=X_pca, columns=['PC1', 'PC2'])

# Add target labels back to the DataFrame
pca_df['target_column'] = y

# Visualize the reduced data (optional)
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
targets = pca_df['target_column'].unique()
colors = ['r', 'g', 'b']  # Define colors for each target label

for target, color in zip(targets, colors):
    indices_to_keep = pca_df['target_column'] == target
    plt.scatter(pca_df.loc[indices_to_keep, 'PC1'],
                pca_df.loc[indices_to_keep, 'PC2'],
                c=color,
                s=50)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(targets)
plt.title('PCA of Multi-omics Data')
plt.show()


This code snippet assumes you have two separate datasets for genomics and transcriptomics, with a common identifier (e.g., patient ID) that you can use to merge the datasets. It then standardizes the data, performs PCA for dimensionality reduction, and visualizes the reduced data. You can extend this example to include more omics data types or use different analysis techniques based on your specific requirements and the nature of your data.

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import plotly.express as px

# Load genomics data
genomics_data = pd.read_csv('genomics_data.csv')

# Load transcriptomics data
transcriptomics_data = pd.read_csv('transcriptomics_data.csv')

# Assuming both datasets have a common identifier (e.g., patient ID)
# Merge datasets on the common identifier
merged_data = pd.merge(genomics_data, transcriptomics_data, on='patient_id')

# Separate features (X) and target labels (if applicable)
X = merged_data.drop('target_column', axis=1)  # Drop target column if present
y = merged_data['target_column']  # Assuming 'target_column' is the column you want to predict

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform PCA for dimensionality reduction
pca = PCA(n_components=2)  # You can choose the number of components based on your requirement
X_pca = pca.fit_transform(X_scaled)

# Create a DataFrame for the reduced data
pca_df = pd.DataFrame(data=X_pca, columns=['PC1', 'PC2'])

# Add target labels back to the DataFrame
pca_df['target_column'] = y

# Create interactive PCA plot
fig = px.scatter(pca_df, x='PC1', y='PC2', color='target_column',
                 title='Interactive PCA of Multi-omics Data',
                 labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2'})

# Show the plot
fig.show()


In this code, we use Plotly's px.scatter() function to create an interactive scatter plot. We specify the DataFrame (pca_df) containing the PCA-transformed data and provide the names of the columns to plot on the x and y axes (x='PC1' and y='PC2'). We also specify the column to use for color encoding (color='target_column'), which allows us to distinguish different target labels in the plot. Finally, we set the plot title and axis labels using the title and labels arguments, respectively.