In [None]:
# Test cell - run this first
import sys
print(f"Python version: {sys.version}")
print(f"Virtual environment: {sys.prefix}")

try:
    import ipykernel
    print("✓ ipykernel is available")
    import jupyter
    print("✓ jupyter is available")
    import numpy as np
    print("✓ numpy is available")
    import pandas as pd
    print("✓ pandas is available")
    print("\n🎉 All systems go! You can now run genetic engineering analysis.")
except ImportError as e:
    print(f"✗ Missing package: {e}")

: 

In [None]:
# Cell 1: Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from Bio.Seq import Seq
from Bio import SeqUtils
import plotly.express as px
print("All packages imported successfully!")

# Cell 2: CRISPR PAM Site Analysis
def find_pam_sites(sequence, pam_pattern="NGG"):
    """Find all PAM sites in a DNA sequence"""
    sites = []
    search_pattern = pam_pattern.replace("N", ".")
    
    for i in range(len(sequence) - len(search_pattern) + 1):
        substring = sequence[i:i+len(search_pattern)]
        # Simple pattern matching (for educational purposes)
        if all(search_pattern[j] == '.' or search_pattern[j] == substring[j] 
               for j in range(len(search_pattern))):
            sites.append((i, substring))
    
    return sites

# Cell 3: Sample genetic data
genes = {
    "BRCA1": "ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG",
    "TP53": "GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA",
    "CFTR": "CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT",
    "APOE": "TAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAG"
}

# Cell 4: Analyze each gene
results = []
for gene_name, sequence in genes.items():
    pam_sites = find_pam_sites(sequence)
    gc_content = SeqUtils.GC(sequence)
    
    results.append({
        'Gene': gene_name,
        'Sequence_Length': len(sequence),
        'GC_Content': gc_content,
        'PAM_Sites': len(pam_sites),
        'PAM_Locations': [site[0] for site in pam_sites[:5]]  # First 5 locations
    })

# Cell 5: Create DataFrame
df = pd.DataFrame(results)
print("Genetic Analysis Results:")
print(df)

# Cell 6: Visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Plot 1: GC Content
axes[0,0].bar(df['Gene'], df['GC_Content'], color='lightblue', alpha=0.7)
axes[0,0].set_title('GC Content by Gene')
axes[0,0].set_ylabel('GC Content (%)')
axes[0,0].tick_params(axis='x', rotation=45)

# Plot 2: PAM Sites
axes[0,1].bar(df['Gene'], df['PAM_Sites'], color='lightcoral', alpha=0.7)
axes[0,1].set_title('PAM Sites by Gene')
axes[0,1].set_ylabel('Number of PAM Sites')
axes[0,1].tick_params(axis='x', rotation=45)

# Plot 3: Sequence Length
axes[1,0].pie(df['Sequence_Length'], labels=df['Gene'], autopct='%1.1f%%')
axes[1,0].set_title('Sequence Length Distribution')

# Plot 4: Correlation heatmap
numeric_df = df[['GC_Content', 'PAM_Sites', 'Sequence_Length']]
sns.heatmap(numeric_df.corr(), annot=True, ax=axes[1,1], cmap='coolwarm')
axes[1,1].set_title('Gene Parameters Correlation')

plt.tight_layout()
plt.show()

# Cell 7: Advanced Analysis with Plotly
fig = px.scatter(df, x='GC_Content', y='PAM_Sites', size='Sequence_Length',
                 color='Gene', hover_name='Gene',
                 title='Gene Characteristics: GC Content vs PAM Sites',
                 size_max=60)
fig.show()

# Cell 8: DNA Sequence Statistics
def analyze_dna_sequence(sequence):
    """Comprehensive DNA sequence analysis"""
    seq = Seq(sequence)
    
    return {
        'length': len(seq),
        'gc_content': SeqUtils.GC(sequence),
        'at_content': 100 - SeqUtils.GC(sequence),
        'molecular_weight': SeqUtils.molecular_weight(seq),
        'nucleotide_counts': {
            'A': sequence.count('A'),
            'T': sequence.count('T'),
            'G': sequence.count('G'),
            'C': sequence.count('C')
        }
    }

# Analyze all genes
print("\nDetailed DNA Analysis:")
for gene_name, sequence in genes.items():
    analysis = analyze_dna_sequence(sequence)
    print(f"\n{gene_name}:")
    for key, value in analysis.items():
        print(f"  {key}: {value}")

: 