# Manhattan and QQ Plots
- **Project:** GP2 AFR-AAC meta-GWAS 
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Started:** 22-FEB-2023
- **Last Updated:** 22-FEB-2023
    - **Update Description:**  Notebook started

## Notebook Overview
- Visualize the meta-GWASes

### CHANGELOG
- 22-FEB-2023: Notebook started 

---
# Data Overview 

| ANCESTRY |     DATASET     | CASES | CONTROLS |  TOTAL  |           ARRAY           |                NOTES                |
|:--------:|:---------------:|:-----:|:--------:|:-------------------------:|:---------------------------------------------------------------------------------------------------------------:|:-----------------------------------:|
|    AFR   | IPDGC – Nigeria |  304  |    285   |   589   |         NeuroChip         | . | 
|    AFR   |  GP2  |  711  |   1,011  |  1,722  |        NeuroBooster       | . |
|    AAC   |  GP2 |  185  |   1,149  |  1,334  |        NeuroBooster       | . | 
|    AAC   |     23andMe     |  288  |  193,985 | 194,273 | Omni Express & GSA & 550k |        Just summary statistics       |

# Getting Started

## Importing packages

In [4]:
## Import the necessary packages 
import os
import numpy as np
import pandas as pd
import gwaslab as gl
import math
import sys
import subprocess
import statsmodels.api as sm
import scipy
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

## Print out package versions
## Getting packages loaded into this notebook and their versions to allow for reproducibility
    # Repurposed code from stackoverflow here: https://stackoverflow.com/questions/40428931/package-for-listing-version-of-packages-used-in-a-jupyter-notebook

## Import packages 
import pkg_resources
import types
from datetime import date
today = date.today()
date = today.strftime("%d-%b-%Y").upper()

## Define function 
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            # Split ensures you get root package, not just imported function
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]

        # Some packages are weird and have different imported names vs. system/pip names
        # Unfortunately, there is no systematic way to get pip names from a package's imported name. You'll have to add exceptions to this list manually!
        poorly_named_packages = {
            "PIL": "Pillow",
            "sklearn": "scikit-learn"
        }
        if name in poorly_named_packages.keys():
            name = poorly_named_packages[name]

        yield name

## Get a list of packages imported 
imports = list(set(get_imports()))

# The only way I found to get the version of the root package from only the name of the package is to cross-check the names of installed packages vs. imported packages
requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

## Print out packages and versions 
print(f"PACKAGE VERSIONS ({date})")
for r in requirements:
    print("\t{}=={}".format(*r))

PACKAGE VERSIONS (25-FEB-2023)
	gwaslab==3.3.20
	matplotlib==3.5.2
	numpy==1.22.4
	scipy==1.8.1
	pandas==1.4.3
	statsmodels==0.13.2
	seaborn==0.11.2


# Cleaning METAL outputs

In [4]:
## Read in METAL outputs  
aac_meta = pd.read_csv(f"{WORK_DIR}/data/AAC-META/AAC-ONLY-META-UpdatedforMETAL1.tbl", sep="\t")
afr_meta = pd.read_csv(f"{WORK_DIR}/data/AFR-META/AFR-ONLY-META-UpdatedforMETAL1.tbl", sep="\t")
combined_meta = pd.read_csv(f"{WORK_DIR}/data/AFR-AAC-META/AFR-AAC-META-UpdatedforMETAL1.tbl", sep="\t")

In [5]:
## Process 

## AAC ONLY
aac_meta[['CHR', 'BP', 'REF', 'ALT']] = aac_meta.MarkerName.str.split(":", expand = True)
aac_meta['CHR'] = aac_meta['CHR'].str.split('r', 1).str[1]
aac_meta.to_csv(f"{WORK_DIR}/data/AAC-META/CHR-BP-AAC-ONLY-UpdatedforMETAL1.tbl", index=False, sep="\t")

## AFR ONLY
afr_meta[['CHR', 'BP', 'REF', 'ALT']] = afr_meta.MarkerName.str.split(":", expand = True)
afr_meta['CHR'] = afr_meta['CHR'].str.split('r', 1).str[1]
afr_meta.to_csv(f"{WORK_DIR}/data/AFR-META/CHR-BP-AFR-ONLY-UpdatedforMETAL1.tbl", index=False, sep="\t")

## AAC+AFR
combined_meta[['CHR', 'BP', 'REF', 'ALT']] = combined_meta.MarkerName.str.split(":", expand = True)
combined_meta['CHR'] = combined_meta['CHR'].str.split('r', 1).str[1]
combined_meta.to_csv(f"{WORK_DIR}/data/AFR-AAC-META/CHR-BP-AFR-AAC-UpdatedforMETAL1.tbl", index=False, sep="\t")

# Zipping .tbl files for GWASlab

In [7]:
%%bash

module load samtools

bgzip ${WORK_DIR}/data/AAC-META/CHR-BP-AAC-ONLY-UpdatedforMETAL1.tbl
bgzip ${WORK_DIR}/data/AFR-META/CHR-BP-AFR-ONLY-UpdatedforMETAL1.tbl
bgzip ${WORK_DIR}/data/AFR-AAC-META/CHR-BP-AFR-AAC-UpdatedforMETAL1.tbl

[+] Loading samtools 1.17  ... 


# Visualization

## AAC ONLY Meta-GWAS

In [None]:
mysumstats = gl.Sumstats(f"{WORK_DIR}/data/AAC-META/CHR-BP-AAC-ONLY-UpdatedforMETAL1.tbl.gz",
             snpid="MarkerName",
             chrom="CHR",
             pos="BP",
             ea="Allele1",
             nea="Allele2",
             beta="Effect",
             se="StdErr",
             p="P-value",
             direction="Direction",
             build="38")
  
mysumstats.plot_mqq(save=f"{WORK_DIR}/data/AAC-META/AAC-ONLY-META-QQplot-FEB2023.png")

## AFR ONLY Meta-GWAS

In [None]:
mysumstats = gl.Sumstats(f"{WORK_DIR}/data/AFR-META/CHR-BP-AFR-ONLY-UpdatedforMETAL1.tbl.gz",
             snpid="MarkerName",
             chrom="CHR",
             pos="BP",
             ea="Allele1",
             nea="Allele2",
             beta="Effect",
             se="StdErr",
             p="P-value",
             direction="Direction",
             build="38")
  
mysumstats.plot_mqq(save=f"{WORK_DIR}/data/AFR-META/AFR-ONLY-META-QQplot-FEB2023.png")

## Joint AFR-AAC Meta-GWAS

In [None]:
mysumstats = gl.Sumstats(f"{WORK_DIR}/data/AFR-AAC-META/CHR-BP-AFR-AAC-UpdatedforMETAL1.tbl.gz",
             snpid="MarkerName",
             chrom="CHR",
             pos="BP",
             ea="Allele1",
             nea="Allele2",
             beta="Effect",
             se="StdErr",
             p="P-value",
             direction="Direction",
             build="38")
  
mysumstats.plot_mqq(save=f"{WORK_DIR}/data/AFR-AAC-META/AFR-AAC-META-QQplot-FEB2023.png")

In [None]:
mysumstats = gl.Sumstats(f"{WORK_DIR}/data/AFR-AAC-META/CHR-BP-AFR-AAC-UpdatedforMETAL1.tbl.gz",
             snpid="MarkerName",
             chrom="CHR",
             pos="BP",
             ea="Allele1",
             nea="Allele2",
             beta="Effect",
             se="StdErr",
             p="P-value",
             direction="Direction",
             build="38")


# Adding Highlights

In [None]:
mysumstats.plot_mqq(anno_set=["chr1:155235878:G:T"],
                    highlight=["chr1:155235878:G:T"],
                    pinpoint=["chr1:155235878:G:T"],
                    anno_alias={"chr1:155235878:G:T":"rs3115534"},
                    save=f"{WORK_DIR}/data/AFR-AAC-META/AFR-AAC-META-QQplot-FEB2023-wANNO.png")