In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import sys
import os
import matplotlib.pyplot as plt

<h1>get_GO_annotation

In [2]:
def get_GO_annotation(df):
    
    path_domain = os.path.dirname(os.path.abspath('__file__'))
    domain_df = pd.read_csv(path_domain + '\\20210308_10090_GO.tab', sep='\t')
    
    GO_BP_new = []
    GO_CC_new = []
    GO_MF_new = []
    Subloc_new = []
    for x in range(len(df)):
        GO_BP = []
        GO_CC = []
        GO_MF = []
        Subloc = []
                

        Pathways = []
        for i in df['Majority protein IDs'][x]:
            if i != '' and i in list(domain_df['Entry']):
                GO_BP.extend(str(domain_df[domain_df['Entry']==i]['Gene ontology (biological process)'].values.tolist()[0]).split(';'))
                GO_CC.extend(str(domain_df[domain_df['Entry']==i]['Gene ontology (cellular component)'].values.tolist()[0]).split(';'))
                GO_MF.extend(str(domain_df[domain_df['Entry']==i]['Gene ontology (molecular function)'].values.tolist()[0]).split(';'))
                Subloc.extend(str(domain_df[domain_df['Entry']==i]['Subcellular location [CC]'].values.tolist()[0]).split(';'))
        GO_BP_new.append(list(set(GO_BP)))
        GO_CC_new.append(list(set(GO_CC)))
        GO_MF_new.append(list(set(GO_MF)))
        Subloc_new.append(list(set(Subloc)))

            
    df['GO_BP'] = GO_BP_new
    df['GO_CC'] = GO_CC_new
    df['GO_MF'] = GO_MF_new
    df['Subloc'] = Subloc_new
    
    return(df)

<h1> map_UniProt

In [3]:
import re
import time
import json
import zlib
from xml.etree import ElementTree
from urllib.parse import urlparse, parse_qs, urlencode
import requests
from requests.adapters import HTTPAdapter, Retry

POLLING_INTERVAL = 3

API_URL = "https://rest.uniprot.org"


retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504])
session = requests.Session()
session.mount("https://", HTTPAdapter(max_retries=retries))


def submit_id_mapping(from_db, to_db, ids):
    request = requests.post(
        f"{API_URL}/idmapping/run",
        data={"from": from_db, "to": to_db, "ids": ",".join(ids)},
    )
    request.raise_for_status()
    return request.json()["jobId"]

def get_next_link(headers):
    re_next_link = re.compile(r'<(.+)>; rel="next"')
    if "Link" in headers:
        match = re_next_link.match(headers["Link"])
        if match:
            return match.group(1)


def check_id_mapping_results_ready(job_id):
    while True:
        request = session.get(f"{API_URL}/idmapping/status/{job_id}")
        request.raise_for_status()
        j = request.json()
        if "jobStatus" in j:
            if j["jobStatus"] == "RUNNING":
                print(f"Retrying in {POLLING_INTERVAL}s")
                time.sleep(POLLING_INTERVAL)
            else:
                raise Exception(request["jobStatus"])
        else:
            return bool(j["results"] or j["failedIds"])


def get_batch(batch_response, file_format, compressed):
    batch_url = get_next_link(batch_response.headers)
    while batch_url:
        batch_response = session.get(batch_url)
        batch_response.raise_for_status()
        yield decode_results(batch_response, file_format, compressed)
        batch_url = get_next_link(batch_response.headers)


def combine_batches(all_results, batch_results, file_format):
    if file_format == "json":
        for key in ("results", "failedIds"):
            if key in batch_results and batch_results[key]:
                all_results[key] += batch_results[key]
    elif file_format == "tsv":
        return all_results + batch_results[1:]
    else:
        return all_results + batch_results
    return all_results


def get_id_mapping_results_link(job_id):
    url = f"{API_URL}/idmapping/details/{job_id}"
    request = session.get(url)
    request.raise_for_status()
    return request.json()["redirectURL"]


def decode_results(response, file_format, compressed):
    if compressed:
        decompressed = zlib.decompress(response.content, 16 + zlib.MAX_WBITS)
        if file_format == "json":
            j = json.loads(decompressed.decode("utf-8"))
            return j
        elif file_format == "tsv":
            return [line for line in decompressed.decode("utf-8").split("\n") if line]
        elif file_format == "xlsx":
            return [decompressed]
        elif file_format == "xml":
            return [decompressed.decode("utf-8")]
        else:
            return decompressed.decode("utf-8")
    elif file_format == "json":
        return response.json()
    elif file_format == "tsv":
        return [line for line in response.text.split("\n") if line]
    elif file_format == "xlsx":
        return [response.content]
    elif file_format == "xml":
        return [response.text]
    return response.text


def get_xml_namespace(element):
    m = re.match(r"\{(.*)\}", element.tag)
    return m.groups()[0] if m else ""


def merge_xml_results(xml_results):
    merged_root = ElementTree.fromstring(xml_results[0])
    for result in xml_results[1:]:
        root = ElementTree.fromstring(result)
        for child in root.findall("{http://uniprot.org/uniprot}entry"):
            merged_root.insert(-1, child)
    ElementTree.register_namespace("", get_xml_namespace(merged_root[0]))
    return ElementTree.tostring(merged_root, encoding="utf-8", xml_declaration=True)


def print_progress_batches(batch_index, size, total):
    n_fetched = min((batch_index + 1) * size, total)
    #print(f"Fetched: {n_fetched} / {total}")


def get_id_mapping_results_search(url):
    parsed = urlparse(url)
    query = parse_qs(parsed.query)
    file_format = query["format"][0] if "format" in query else "json"
    if "size" in query:
        size = int(query["size"][0])
    else:
        size = 500
        query["size"] = size
    compressed = (
        query["compressed"][0].lower() == "true" if "compressed" in query else False
    )
    parsed = parsed._replace(query=urlencode(query, doseq=True))
    url = parsed.geturl()
    request = session.get(url)
    request.raise_for_status()
    results = decode_results(request, file_format, compressed)
    total = int(request.headers["x-total-results"])
    print_progress_batches(0, size, total)
    for i, batch in enumerate(get_batch(request, file_format, compressed), 1):
        results = combine_batches(results, batch, file_format)
        print_progress_batches(i, size, total)
    if file_format == "xml":
        return merge_xml_results(results)
    return results


def get_id_mapping_results_stream(url):
    if "/stream/" not in url:
        url = url.replace("/results/", "/stream/")
    request = session.get(url)
    request.raise_for_status()
    parsed = urlparse(url)
    query = parse_qs(parsed.query)
    file_format = query["format"][0] if "format" in query else "json"
    compressed = (
        query["compressed"][0].lower() == "true" if "compressed" in query else False
    )
    return decode_results(request, file_format, compressed)


def map_UniProtID(UniProtIDs, identifier_from, identifier_to):
    try:
        job_id = submit_id_mapping(
            from_db=identifier_from, to_db=identifier_to, ids=UniProtIDs 
        )
        if check_id_mapping_results_ready(job_id):
            link = get_id_mapping_results_link(job_id)
            results = get_id_mapping_results_search(link)
            # Equivalently using the stream endpoint which is more demanding
            # on the API and so is less stable:
            # results = get_id_mapping_results_stream(link)
        
    except:
        ensemble_flat = []

        
    id_dict = {results['results'][i]['from']:results['results'][i]['to'] for i in range(len(results['results']))}
    return id_dict


<h1>get_IMPI_annotation</h1> --> file import

In [4]:
def get_IMPI_annotation(df):
    
    path_IMPI = os.path.dirname(os.path.abspath('__file__'))
   
    path_IMPI_file = path_IMPI +'\\IMPI_2021_Q4pre_Mus_Musculus.csv'
    df_IMPI = pd.read_csv(path_IMPI_file)
    
    
    IMPI_new = []
    for x in range(len(df)):
        IMPI = []
        for i in df['ESNG'][x]:
            if i in list(df_IMPI['Ensembl Gene ID Mus Musculus']):
                IMPI.append(list(df_IMPI[df_IMPI['Ensembl Gene ID Mus Musculus']==i]['IMPI Class'])[0])
            else:
                IMPI.append('NA')
        IMPI_new.append(IMPI)
        
    df['IMPI_new']=IMPI_new
    return(df)

<h1>get_MitoCarta_annotation</h1> --> file import

In [5]:
def get_MitoCharta_annotation(df):
    
    path_MC3 = os.path.dirname(os.path.abspath('__file__'))
    df_MC3 = pd.read_excel(path_MC3+'\\Mouse_MitoCarta3_0.xls', sheet_name = [0,1,2])
    
    SubMitoLocalization_new = []
    Pathways_new = []
    for x in range(len(df)):
        SubMitoLocalization = []
        Pathways = []
        for i in df['Entrez_GeneID'][x]:
            if i != '' and int(i) in list(df_MC3[1]['MouseGeneID']):
                SubMitoLocalization.append(list(df_MC3[1][df_MC3[1]['MouseGeneID']==int(i)]['MitoCarta3.0_SubMitoLocalization'])[0])
        SubMitoLocalization_new.append(list(set(SubMitoLocalization)))
        
        for i in df['Entrez_GeneID'][x]:
            if i != '' and len(list(df_MC3[1][df_MC3[1]['MouseGeneID']==int(i)]['MitoCarta3.0_MitoPathways']))>0 and list(df_MC3[1][df_MC3[1]['MouseGeneID']==int(i)]['MitoCarta3.0_MitoPathways'])!=[0]:
                Pathways.extend(list(df_MC3[1][df_MC3[1]['MouseGeneID']==int(i)]['MitoCarta3.0_MitoPathways'])[0].split(' | '))
        Pathways_new.append(list(set(Pathways)))
            
    df['SubMitoLocalization'] = SubMitoLocalization_new
    df['Pathways'] = Pathways_new
    return(df)

<h1>Data_filter

In [6]:
def filter_(df,samples=['B_','BAT_','H_','K_','L_','S_','SKM_'],cutoff = 0.75):
    
    valid= []
    #get groups 
    for s in range(len(samples)):
        col = []
        per_val_val = []
        values_total = []
        [col.append(i) for i in range(len(list(df.columns))) if df.columns[i].find('Intensity '+samples[s])==0]
        df.iloc[:,col] = np.log2((df.iloc[:,col]).replace(0,np.nan))
        for z in range(len(df)):
            valid_values = 0
            valid_values = [valid_values+1 for i in range(len(col)) if ((df.iloc[z,col[i]]>0) & (df.iloc[z,col[i]]!=np.nan))]
            per_val_val.append(sum(valid_values)/len(col))
            values_total.append(len(col))
        df['Valid values_'+samples[s]] = per_val_val
        df['Values_Tissue_'+samples[s]] = values_total
    
    df['TissueID'] = df[['Valid values_'+samples[0],'Valid values_'+samples[1],'Valid values_'+samples[2],'Valid values_'+samples[3],'Valid values_'+samples[4],'Valid values_'+samples[5],'Valid values_'+samples[6]]].replace(0,np.nan).count(axis=1)
        
    
    df = df.reset_index(drop=True)
    
    row_filter = [i for i in range(len(df)) if (('Verified mitochondrial' in df['IMPI_new'][i])==True or 
                                                         #('Predicted mitochondrial' in df['IMPI_new'][i])==True or 
                                                         df['SubMitoLocalization'][i]!=[])]
    df_filtered = df.iloc[row_filter,:].reset_index(drop=True)

    #Median normalization -> Substract Median log2 value of column from each member of column
    cols = []
    for i in range(len(list(df.columns))):
        if df.columns[i].find('Intensity')==0:
            cols.append(i)
    
    df = df.join(pd.DataFrame((df.iloc[:,cols]-df_filtered.iloc[:,cols].median()).to_numpy(), columns = list('Norm_'+df.columns[cols])))
    
    #z-Score across all expanded samples
    cols = []
    xx = pd.DataFrame()
    
    for i in range(len(list(df.columns))):
        if df.columns[i].find('Norm_')==0:
            cols.append(i)
            
    for i in range(len(df)):
        #xx= xx.append(pd.Series(stats.zscore(df.iloc[i,cols].replace('NaN',np.nan), nan_policy = 'omit')),ignore_index=True)
        xx= xx.append(pd.Series(stats.zscore(list(df.iloc[i,cols]), nan_policy = 'omit')),ignore_index=True)
    xx.columns = list('Zscore_'+df.columns[cols])
    
    df = df.join(xx)
    #get median of each group  
    group = {}
    for s in range(len(samples)):
        col = []
        for i in range(len(list(df.columns))):
            if ((df.columns[i].find('Zscore_Norm_Intensity '+samples[s])==0)):
                col.append(i)
            group.update({samples[s]:col})
    for i in range(len(group)):
        df['Median_Z-score_'+list(group.keys())[i]] = df.iloc[:,(list(group.values())[i])].median(1)
        
        
    #calculate valid values    
    col_val = []
    for i in range(len(list(df.columns))):
        if ((df.columns[i].find('Valid values_')==0)):
            col_val.append(i)
    #get max valid value value across groups -> at least XX valid values in at least 1 group    
    for i in range(len(df)):
        if df.iloc[i,col_val].max()>=cutoff:
            valid.append(True)
        else:
            valid.append(False)
    df['valid'] = valid
    df = df[df['valid']==True]
    df = df.reset_index(drop=True)
    return (df)

<h1>gaussian

In [7]:
def gaussian(df, width=0.3, downshift=-1.8, prefix=None):
    """
    Impute missing values by drawing from a normal distribution

    :param df:
    :param width: Scale factor for the imputed distribution relative to the standard deviation of measured values. Can be a single number or list of one per column.
    :param downshift: Shift the imputed values down, in units of std. dev. Can be a single number or list of one per column
    :param prefix: The column prefix for imputed columns
    :return:
    """

    df = df.copy()

    imputed = df.isnull()  # Keep track of what's real

    if prefix:
        mask = np.array([l.startswith(prefix) for l in df.columns.values])
        mycols = np.arange(0, df.shape[1])[mask]
    else:
        mycols = np.arange(0, df.shape[1])
    
    if type(width) is not list:
        width = [width] * len(mycols)

    elif len(mycols) != len(width):
        raise ValueError("Length of iterable 'width' does not match # of columns")

    if type(downshift) is not list:
        downshift = [downshift] * len(mycols)

    elif len(mycols) != len(downshift):
        raise ValueError("Length of iterable 'downshift' does not match # of columns")

    for i in mycols:
        data = df.iloc[:, i]
        mask = data.isnull().values
        mean = data.mean(axis=0)
        stddev = data.std(axis=0)

        m = mean + downshift[i]*stddev
        s = stddev*width[i]

        # Generate a list of random numbers for filling in
        values = np.random.normal(loc=m, scale=s, size=df.shape[0])
    
        # Now fill them in
        df.iloc[mask, i] = values[mask]

    return df, imputed


<h1>get_kinase_annotation </h1> --> file import

In [8]:
def get_kinase_annotation(df):
    
    path_kinase = os.path.dirname(os.path.abspath('__file__'))    
    path_kinase_file = path_kinase +'\\pkinfam_annotated.csv'
    df_kinase = pd.read_csv(path_kinase_file)
    
    kinase_new = []
    for x in range(len(df)):
        kinase = []
        for i in df['Majority protein IDs'][x]:
            if i != '' and i in list(df_kinase['UniProtID_mouse']):
                kinase.append(list(df_kinase[df_kinase['UniProtID_mouse']==i]['Kinase_fam'])[0])
        kinase_new.append(list(set(kinase)))    
   
    df['Kinase'] = kinase_new

    return(df)

In [9]:
def annotate_ESNG(flat_list, identifier_from, identifier_to,df):

    id_dict = map_UniProtID(flat_list,identifier_from, identifier_to)
    
    no_match_dict = {no_match:'' for no_match in list(set(flat_list).difference(set(list(id_dict.keys()))))}
    id_dict.update(no_match_dict)
    
    xx = [[re.findall(r"ENSMUSG\d+",id_dict[item]) for item in df['Majority protein IDs'][i]] for i in range(len(df))]
    return_list = [[item[0] if item != [] else '' for item in row] for row in xx]

    return return_list

In [10]:
def annotate_Entrez_String(flat_list, identifier_from, identifier_to,df):

    id_dict = map_UniProtID(flat_list,identifier_from, identifier_to)
    
    no_match_dict = {no_match:'' for no_match in list(set(flat_list).difference(set(list(id_dict.keys()))))}
    id_dict.update(no_match_dict)
    
    return_list = [[id_dict[item] for item in df['Majority protein IDs'][i]] for i in range(len(df))]
    #return_list = [[item[0] if item != [] else '' for item in row] for row in xx]

    return return_list

<h1> filter_mito

In [11]:
def filter_Mito(df):
    #df['IMPI_new']= [df['IMPI_new'][i].split(',') for i in range(len(df))]
    row_filter = [i for i in range(len(df)) if (('Verified mitochondrial' in df['IMPI_new'][i])==True or 
                                                         #('Predicted mitochondrial' in df['IMPI_new'][i])==True or 
                                                         df['SubMitoLocalization'][i]!=[])]
    df_filtered = df.iloc[row_filter,:].reset_index(drop=True)
    return(df_filtered)

<h1>Impute

In [12]:
def impute(df, prefix = 'Norm'):
    import numpy as np
    from scipy import stats
    
    cols = []
    for i in range(len(list(df.columns))):
        if df.columns[i].find(prefix)==0:
            cols.append(i)

    df.iloc[:,cols], t = gaussian(pd.DataFrame(df.iloc[:,cols]))
    
    return(df)

<h1>Formating

In [13]:
def formating(df):
    [df['GO_BP'][i].remove('nan') for i in range(len(df)) if ('nan' in df['GO_BP'][i])]
    [df['GO_CC'][i].remove('nan') for i in range(len(df)) if ('nan' in df['GO_CC'][i])]
    [df['GO_MF'][i].remove('nan') for i in range(len(df)) if ('nan' in df['GO_MF'][i])]
    [df['Subloc'][i].remove('nan') for i in range(len(df)) if ('nan' in df['Subloc'][i])]
    df['GO_BP'] = [';'.join(df['GO_BP'][i]) for i in range(len(df))]
    df['GO_CC'] = [';'.join(df['GO_CC'][i]) for i in range(len(df))]
    df['GO_MF'] = [';'.join(df['GO_MF'][i]) for i in range(len(df))]
    df['Subloc'] = [';'.join(df['Subloc'][i]) for i in range(len(df))]
    df['Pathways']= [';'.join(df['Pathways'][i]) for i in range(len(df))]
    df['Majority protein IDs'] = [';'.join(df['Majority protein IDs'][i]) for i in range(len(df))]
    df['Entrez_GeneID']= [';'.join(df['Entrez_GeneID'][i]) for i in range(len(df))]
    df['SubMitoLocalization']= [';'.join(df['SubMitoLocalization'][i]) for i in range(len(df))]
    df['ESNG']= [';'.join(df['ESNG'][i]) for i in range(len(df))]
    df['IMPI_new']= [';'.join(df['IMPI_new'][i]) for i in range(len(df))]
    df['Kinase'] = [';'.join(df['Kinase'][i]) for i in range(len(df))]
    df['StringID']= [';'.join(df['StringID'][i]) for i in range(len(df))]
    df["Protein_Selection_ID"] = df["Gene names"].astype(str)+"_"+df["Protein IDs"]
    return df

In [14]:
path = os.path.dirname(os.path.abspath('__file__'))+'\\MQ_output\\' ## might be adjusted



columns_to_import = ['Protein IDs','Majority protein IDs','Peptide counts (all)','Peptide counts (razor+unique)','Peptide counts (unique)','Protein names','Gene names','Fasta headers','Number of proteins','Peptides','Razor + unique peptides','Unique peptides','Intensity B_1','Intensity B_2','Intensity B_3','Intensity B_4','Intensity B_5','Intensity B_6','Intensity BAT_1','Intensity BAT_2','Intensity BAT_3','Intensity BAT_4','Intensity BAT_5','Intensity BAT_6','Intensity H_1','Intensity H_2','Intensity H_3','Intensity H_4','Intensity H_5','Intensity H_6','Intensity K_1','Intensity K_2','Intensity K_3','Intensity K_4','Intensity K_5','Intensity K_6','Intensity L_1','Intensity L_2','Intensity L_3','Intensity L_4','Intensity L_5','Intensity L_6','Intensity S_1','Intensity S_2','Intensity S_3','Intensity S_4','Intensity S_5','Intensity S_6','Intensity SKM_1','Intensity SKM_2','Intensity SKM_3','Intensity SKM_4','Intensity SKM_5','Intensity SKM_6','Only identified by site','Reverse','Potential contaminant','id','Peptide IDs','Peptide is razor','Mod. peptide IDs','Evidence IDs','MS/MS IDs','Best MS/MS','Oxidation (M) site IDs','Phospho (STY) site IDs','Oxidation (M) site positions','Phospho (STY) site positions','Taxonomy IDs']
df = pd.read_table((path+ 'proteinGroups.txt'), sep = '\t', usecols= columns_to_import, low_memory=False).sort_values(by=['Gene names']).reset_index(drop=True)
df = df[(df['Reverse']!='+') & (df['Potential contaminant']!='+')].reset_index(drop=True)

df['Majority protein IDs'] = [df['Majority protein IDs'][i].split(';') for i in range(len(df))]

df = get_GO_annotation(df)

In [15]:
flat_list = list(set([x for xs in df['Majority protein IDs'] for x in xs]))

#ensemble_dict = map_UniProtID(flat_list,'UniProtKB_AC-ID','Ensembl')
#no_match_dict = {no_match:'' for no_match in list(set(flat_list).difference(set(list(ensemble_dict.keys()))))}
#ensemble_dict.update(no_match_dict)
#df['ESNG'] = [list(set([x for xs in [re.findall(r"ENSMUSG\d+",ensemble_dict[item]) for item in df['Majority protein IDs'][i] if item in ensemble_dict.keys()] for x in xs])) for i in range(len(df))]
#
#entrez_dict = map_UniProtID(flat_list,'UniProtKB_AC-ID','GeneID')
#no_match_dict = {no_match:'' for no_match in list(set(flat_list).difference(set(list(entrez_dict.keys()))))}
#entrez_dict.update(no_match_dict)
#df['Entrez_GeneID'] = [list(set([entrez_dict[item] for item in df['Majority protein IDs'][i] if item in entrez_dict.keys()])) for i in range(len(df))]

df['ESNG'] = annotate_ESNG(flat_list,'UniProtKB_AC-ID','Ensembl',df)
df['Entrez_GeneID'] = annotate_Entrez_String(flat_list,'UniProtKB_AC-ID','GeneID',df)
#df['StringID'] = annotate(flat_list,'UniProtKB_AC-ID','STRING',df)

df = get_IMPI_annotation(df)
df = get_MitoCharta_annotation(df)
df = get_kinase_annotation(df)

df['StringID'] = annotate_Entrez_String(flat_list,'UniProtKB_AC-ID','STRING',df)
#string_dict = map_UniProtID(flat_list,'UniProtKB_AC-ID','STRING')
#no_match_dict = {no_match:'' for no_match in list(set(flat_list).difference(set(list(string_dict.keys()))))}
#entrez_dict.update(string_dict)
#df['StringID'] = [list(set([string_dict[item] for item in df['Majority protein IDs'][i] if item in string_dict.keys()])) for i in range(len(df))]


Retrying in 3s
Retrying in 3s
Retrying in 3s
Retrying in 3s
Retrying in 3s
Retrying in 3s
Retrying in 3s
Retrying in 3s
Retrying in 3s
Retrying in 3s


In [16]:


df_cut0_notimuted = df.copy(deep= True)
df_cut0_notimuted = filter_(df_cut0_notimuted, cutoff = 0)
#df_cut0_notimuted = formating(df_cut0_notimuted)

df_cut05_notimuted = df.copy(deep= True)
df_cut05_notimuted = filter_(df_cut05_notimuted, cutoff = 0.5)
df_cut05_notimuted = formating(df_cut05_notimuted)

df_cut0_notimuted_mito = df_cut0_notimuted.copy(deep= True)
df_cut0_notimuted_mito = filter_Mito(df_cut0_notimuted_mito)
df_cut0_notimuted_mito = formating(df_cut0_notimuted_mito)

df_cut0_notimuted = formating(df_cut0_notimuted)

df = filter_(df,cutoff=0.5)

df_mito = df.copy(deep= True)
df_mito = filter_Mito(df_mito)
df_mito = formating(df_mito)
#
#
df_imputed = df.copy(deep= True)
df_imputed = impute(df_imputed)
df_imputed = formating(df_imputed)

#path_saving = os.path.dirname(os.path.abspath('__file__'))+'\\Prepared_tables\\'
#df_mito.to_csv((path_saving+ 'Prep_Protein Groups_mito.csv'), index= False)
#df_imputed.to_csv((path_saving+ 'Prep_Protein Groups_imputed.csv'), index= False)
#df_cut0_notimuted.to_csv((path_saving+ 'Prep_Protein Groups_cut0_notimuted.csv'), index= False)
#df_cut05_notimuted.to_csv((path_saving+ 'Prep_Protein Groups_cut05_notimuted.csv'), index= False)
#df_cut0_notimuted_mito.to_csv((path_saving+ 'Prep_Protein Groups_cut0_notimuted_mito.csv'), index= False)

  xx= xx.append(pd.Series(stats.zscore(list(df.iloc[i,cols]), nan_policy = 'omit')),ignore_index=True)
  xx= xx.append(pd.Series(stats.zscore(list(df.iloc[i,cols]), nan_policy = 'omit')),ignore_index=True)
  xx= xx.append(pd.Series(stats.zscore(list(df.iloc[i,cols]), nan_policy = 'omit')),ignore_index=True)
  xx= xx.append(pd.Series(stats.zscore(list(df.iloc[i,cols]), nan_policy = 'omit')),ignore_index=True)
  xx= xx.append(pd.Series(stats.zscore(list(df.iloc[i,cols]), nan_policy = 'omit')),ignore_index=True)
  xx= xx.append(pd.Series(stats.zscore(list(df.iloc[i,cols]), nan_policy = 'omit')),ignore_index=True)


In [19]:
path_saving = os.path.dirname(os.path.abspath('__file__'))+'\\Prepared_tables\\'
df_mito.to_csv((path_saving+ 'Prep_Protein Groups_mito.csv'), index= False)
df_imputed.to_csv((path_saving+ 'Prep_Protein Groups_imputed.csv'), index= False)
df_cut0_notimuted.to_csv((path_saving+ 'Prep_Protein Groups_cut0_notimuted.csv'), index= False)
df_cut05_notimuted.to_csv((path_saving+ 'Prep_Protein Groups_cut05_notimuted.csv'), index= False)
df_cut0_notimuted_mito.to_csv((path_saving+ 'Prep_Protein Groups_cut0_notimuted_mito.csv'), index= False)