In [25]:
%matplotlib inline

import csv
import numpy as np
import matplotlib.pyplot as plt
import math
import operator

In [217]:
def parse_data(data_file):
    new_list = []
    with open(data_file) as f:
        reader = csv.reader(f, delimiter = "\t")
        for row in reader:
            new_list.append(row)
    return new_list

    
#takes the data and plots it on the same plot
def plot_data_together(listX, Y_master_list, title):
    for i in range(len(Y_master_list)):
        plt.plot(listX, Y_master_list[i])
    plt.xlabel("Days")
    plt.title(title)
    plt.show()
    
    
#given a list of data sets, plots them on individual graphs
def plot_data_separate(listX, Y_master_list):
    fig = plt.figure(1, figsize = (20, 75))
    length = len(Y_master_list)
    plot_height = math.ceil(length / 2)
    for i in range(1, length):
        plt.subplot(plot_height, 2, (i))
        plt.plot(listX, Y_master_list[i])
    plt.show()
    
    
#splits the master list into two lists, one with leaf data, one with apex data
def split_lists_by_tissue(master_list):
    leaf_list = []
    apex_list = []
    for i in range(0, len(master_list)):
        if master_list[i][1] == "leaf":
            leaf_list.append(master_list[i])
        else:
            apex_list.append(master_list[i])
    return leaf_list, apex_list   
    

#splits the list into two - for 'westar' and 'tapidor' samples    
def split_lists_by_species(input_list):
    tapidor_list = []
    westar_list = []
    for i in range(0, len(input_list)):
        if input_list[i][2] == "tapidor":
            tapidor_list.append(input_list[i])
        else:
            westar_list.append(input_list[i])
    
    return tapidor_list, westar_list
    
    
#delete the "NA"s in the tapidor sample 
def clean_data(input_list):
    if input_list[0][2] == "tapidor": 
    
        for i in range(len(input_list)):
            del input_list[i][8]
    
    elif input_list[0][2] == "westar":
        for i in range(len(input_list)):
            del input_list[i][-1]
                
        if input_list[0][1] == "leaf":
        
            for i in range(len(input_list)):
                del input_list[i][8]    
        
    return input_list
        
    
#removes all data with a 0 value
def remove_zero_data(input_list):
    for i in range((len(input_list) - 1), -1, -1):
        if "0" in input_list[i]:
            del input_list[i]
    return input_list      
       

#finds the maximum value in each sublist    
def max_and_min(input_list):
    maximums = []
    minimums = []
    for i in range(len(input_list)):
        t = 0
        for j in range(3, len(input_list[i])):
            if float(input_list[i][j]) > float(t):
                t = input_list[i][j]
        maximums.append(t)
    for i in range(len(input_list)):
        t = maximums[i]
        for j in range(3, len(input_list[i])):
            if float(input_list[i][j]) < float(t):
                t = input_list[i][j]
        minimums.append(t)
    return maximums, minimums   


#takes a list of data and normalises it, retaining the gene name etc.
def normalise_list(input_list, maxes, mins):
    headings = [[input_list[i][j]
                for j in range(3)]
                for i in range (len(input_list))]
    
    def normalise_values(num, _max, _min):
        return (float(num) - float(_min))/(float(_max) - float(_min))
    
    values = [
               [normalise_values(input_list[i][j], maxes[i], mins[i])
                for j in range(3, len(input_list[i]))]
                for i in range(len(input_list))
    ]
    for i in range(len(input_list)):
        headings[i].extend(values[i])
    return headings


#given a normalised list of data, returns those those with their maximum at a particular day and removes the non-numerical data
def select_max_at_n(input_list, max_day):
    n = 0
    if max_day == 22:
        n = 3
    elif max_day == 43:
        n = 4
    elif max_day == 64:
        n = 5
    elif max_day == 65:
        n = 6
    elif max_day == 67:
        n = 7
    elif max_day == 69:
        if "westar" in input_list[0] and "apex" in input_list[0]:
            n = 8
        else:
            print("The selected day is not available for that data")
    elif max_day == 72:
        if "westar" in input_list[0] and "apex" in input_list[0]:
            n = 9
        else:
            n = 8
    elif max_day == 83:
        if "tapidor" in input_list[0]:
            n = 9
        else:
            print("The selected day is not available for that data")
    new_list = [
               [input_list[i][j]
                for j in range(len(input_list[i]))]
                for i in range (len(input_list))
                if input_list[i][n] == 1.0
        ]
    length = len(new_list)
    
    return new_list, length

def select_min_at_n(input_list, min_day):
    n = 0
    #if not input_list:
        #return ("The selected day is not available for that dataaaaaa")
    if min_day == 22:
        n = 3
    elif min_day == 43:
        n = 4
    elif min_day == 64:
        n = 5
    elif min_day == 65:
        n = 6
    elif min_day == 67:
        n = 7
    elif min_day == 69:
        if "westar" in input_list[0] and "apex" in input_list[0]:
            n = 8
        else:
            print("The selected day is not available for that data")
    elif min_day == 72:
        #print(input_list)
        if "westar" in input_list[0] and "apex" in input_list[0]:
            print(True)
            n = 9
        else:
            n = 8
    elif min_day == 83:
        if "tapidor" in input_list[0]:
            n = 9
        else:
            print("The selected day is not available for that data")
    new_list = [
               [input_list[i][j]
                for j in range(len(input_list[i]))]
                for i in range (len(input_list))
                if input_list[i][n] == 0
        ]
    length = len(new_list)
    
    return new_list, length



def remove_names(input_list):
    new_list = [
               [input_list[i][j]
                for j in range(3, len(input_list[i]))]
                for i in range (len(input_list))
        ]
    return new_list

def plot_normalised_data():   
    plot_data_together(tapidor_days, remove_names(normalised_t_l), "Normalised data from Tapidor leaf")
    plot_data_together(tapidor_days, remove_names(normalised_t_a), "Normalised data from Tapidor apex")
    plot_data_together(westar_leaf_days, remove_names(normalised_w_l), "Normalised data from Westar leaf")
    plot_data_together(westar_apex_days, remove_names(normalised_w_a), "Normalised from Westar apex")
    
def plot_normalised_data_max(max_day):
    normalised_t_l_max, length_t_l_max = select_max_at_n(normalised_t_l, max_day)
    normalised_t_a_max, length_t_a_max = select_max_at_n(normalised_t_a, max_day)
    normalised_w_l_max, length_w_l_max = select_max_at_n(normalised_w_l, max_day)
    normalised_w_a_max, length_w_a_max = select_max_at_n(normalised_w_a, max_day)
    plot_data_together(tapidor_days, remove_names(normalised_t_l_max), ("Normalised data from Tapidor leaf with max at %s days\n (%s sets of data)") % (max_day, length_t_l_max))
    plot_data_together(tapidor_days, remove_names(normalised_t_a_max), ("Normalised data from Tapidor apex with max at %s days\n (%s sets of data)") % (max_day, length_t_a_max))
    plot_data_together(westar_leaf_days, remove_names(normalised_w_l_max), ("Normalised data from Westar leaf with max at %s days\n (%s sets of data)") % (max_day, length_w_l_max))
    plot_data_together(westar_apex_days, remove_names(normalised_w_a_max), ("Normalised data from Westar apex with max at %s days\n (%s sets of data)") % (max_day, length_w_a_max))

def plot_normalised_data_min(min_day):
    normalised_t_l_min, length_t_l_min = select_min_at_n(normalised_t_l, min_day)
    normalised_t_a_min, length_t_a_min = select_min_at_n(normalised_t_a, min_day)
    normalised_w_l_min, length_w_l_min = select_min_at_n(normalised_w_l, min_day)
    normalised_w_a_min, length_w_a_min = select_min_at_n(normalised_w_a, min_day)
    plot_data_together(tapidor_days, remove_names(normalised_t_l_min), ("Normalised data from Tapidor leaf with min at %s days\n (%s sets of data)") % (min_day, length_t_l_min))
    plot_data_together(tapidor_days, remove_names(normalised_t_a_min), ("Normalised data from Tapidor apex with min at %s days\n (%s sets of data)") % (min_day, length_t_a_min))
    plot_data_together(westar_leaf_days, remove_names(normalised_w_l_min), ("Normalised data from Westar leaf with min at %s days\n (%s sets of data)") % (min_day, length_w_l_min))
    plot_data_together(westar_apex_days, remove_names(normalised_w_a_min), ("Normalised data from Westar apex with min at %s days\n (%s sets of data)") % (min_day, length_w_a_min))
       
def plot_normalised_data_max_min(max_day, min_day):
    normalised_t_l_max_min, length_t_l_max_min = select_max_at_n(normalised_t_l, max_day)
    normalised_t_l_max_min, length_t_l_max_min = select_min_at_n(normalised_t_l_max_min, min_day)
    
    normalised_t_a_max_min, length_t_a_max_min = select_max_at_n(normalised_t_a, max_day)
    normalised_t_a_max_min, length_t_a_max_min = select_min_at_n(normalised_t_a_max_min, min_day)
    
    normalised_w_l_max_min, length_w_l_max_min = select_max_at_n(normalised_w_l, max_day)
    normalised_w_l_max_min, length_w_l_max_min = select_min_at_n(normalised_w_l_max_min, min_day)
    
    normalised_w_a_max_min, length_w_a_max_min = select_max_at_n(normalised_w_a, max_day)
    normalised_w_a_max_min, length_w_a_max_min = select_min_at_n(normalised_w_a_max_min, min_day)
    
    plot_data_together(tapidor_days, remove_names(normalised_t_l_max_min), ("Normalised data from Tapidor leaf with max at %s days and min at %s days\n (%s sets of data)") % (max_day, min_day, length_t_l_max_min))
    plot_data_together(tapidor_days, remove_names(normalised_t_a_max_min), ("Normalised data from Tapidor apex with max at %s days and min at %s days\n (%s sets of data)") % (max_day, min_day, length_t_a_max_min))
    plot_data_together(westar_leaf_days, remove_names(normalised_w_l_max_min), ("Normalised data from Westar leaf with max at %s days and min at %s days\n (%s sets of data)") % (max_day, min_day, length_w_l_max_min))
    plot_data_together(westar_apex_days, remove_names(normalised_w_a_max_min), ("Normalised data from Westar apex with max at %s days and min at %s days\n (%s sets of data)") % (max_day, min_day, length_w_a_max_min))  
    
def plot_gene_data(listX, listY, gene):
    if len(listY) == 0:
        print("No data for index %s" % gene)
        return
    gene_names = [listY[i][0] for i in range(len(listY))]
    plot_data_together(listX, remove_names(listY), ("""%s piece(s) of data for gene %s\n%s""" % (len(listY), gene, gene_names)))
    
    
#returns a list with similar data to that inputted
#def compare_data(input_list, num, thresh): #where num is index in input_list to compare all other data to
#    compare_list = input_list[num]
#    new_list = [[abs(float(compare_list[j]) - float(input_list[i][j])) for j in range(3, len(input_list[i]))] for i in range(len(input_list))]
#    #print(new_list)
#    similar_data = [input_list[i] 
#                    for i in range(len(input_list))
#                    if np.mean(new_list[i]) < thresh and i != num]
#    return(similar_data)


def compare_data_by_gene(input_list, gene, thresh): #where num is index in input_list to compare all other data to
    compare_list = []
    num = 0
    for i in range(len(input_list)):
        if gene in input_list[i]:
            num = i
            compare_list = input_list[i]
            break
    new_list = [[abs(float(compare_list[j]) - float(input_list[i][j])) for j in range(3, len(input_list[i]))] for i in range(len(input_list))]
    #print(new_list)
    similar_data = [input_list[i] 
                    for i in range(len(input_list))
                    if np.mean(new_list[i]) < thresh and i != num]
    return(similar_data)
    
  

master_days = [22, 43, 64, 65, 67, 69, 72, 83]
tapidor_days = [22, 43, 64, 65, 67, 72, 83]
westar_apex_days = [22, 43, 64, 65, 67, 69, 72]
westar_leaf_days = [22, 43, 64, 65, 67, 72]

gene_data = parse_data("C:\\Users\\browndo\\Documents\\brassica_flowering_expression.tsv")

del gene_data[0] #removes the 'header' with the column titles etc.

master_leaf_list, master_apex_list = split_lists_by_tissue(gene_data)

tapidor_leaf, westar_leaf = split_lists_by_species(master_leaf_list)
tapidor_apex, westar_apex = split_lists_by_species(master_apex_list)

tapidor_leaf = remove_zero_data(clean_data(tapidor_leaf))
tapidor_apex = remove_zero_data(clean_data(tapidor_apex))
westar_leaf = remove_zero_data(clean_data(westar_leaf))
westar_apex = remove_zero_data(clean_data(westar_apex))

maxes_t_l, mins_t_l = max_and_min(tapidor_leaf)
maxes_t_a, mins_t_a = max_and_min(tapidor_apex)
maxes_w_l, mins_w_l = max_and_min(westar_leaf)
maxes_w_a, mins_w_a = max_and_min(westar_apex)

normalised_t_l = normalise_list(tapidor_leaf, maxes_t_l, mins_t_l)
normalised_t_a = normalise_list(tapidor_apex, maxes_t_a, mins_t_a)
normalised_w_l = normalise_list(westar_leaf, maxes_w_l, mins_w_l)
normalised_w_a = normalise_list(westar_apex, maxes_w_a, mins_w_a)


In [220]:
#plot_normalised_data()
#for i in master_days:
#    plot_normalised_data_max(i)
#plot_normalised_data_max(72)
#plot_normalised_data_min(69)

#for i in master_days:
#    for j in master_days:
#        if i != j:
#            plot_normalised_data_max_min(i, j)
#            print("-" * 20)
#    print("-" * 100)

#plot_normalised_data_max_min(43, 67)

#plt.plot(tapidor_days, remove_names(normalised_t_l[i]))

gene_names = [normalised_t_l[i][0] for i in range(len(normalised_t_l))]
len(gene_names)

some_genes = [gene_names[i] for i in range(200)]
#print(some_genes)

#for gene in some_genes:                                                #threshold value  
#    plot_gene_data(tapidor_days, compare_data_by_gene(normalised_t_l, gene, 0.05), gene)

#plot_gene_data(tapidor_days, compare_data_by_gene(normalised_t_l, gene, 0.1), gene)


In [207]:

master_days = [22, 43, 64, 65, 67, 69, 72, 83]

tapidor_days = [22, 43, 64, 65, 67, 72, 83]
westar_apex_days = [22, 43, 64, 65, 67, 69, 72]
westar_leaf_days = [22, 43, 64, 65, 67, 72]

days_dict = {
    "master_days" : [22, 43, 64, 65, 67, 69, 72, 83],
    "tapidor_days" : [22, 43, 64, 65, 67, 72, 83],
    "westar_apex_days" : [22, 43, 64, 65, 67, 69, 72],
    "westar_leaf_days" : [22, 43, 64, 65, 67, 72]
}


print(days_dict.get("master_days"))

[22, 43, 64, 65, 67, 69, 72, 83]


In [88]:
a = [["spoon", 1, 2, 3], ["apple", 3, 8, 1], ["spoon", 8, 9, 12], ["cake", 18, 2, 4], ["cake", 14, 13, 12], ["apple", 10, 9, 12]]
b = [[4, 1, 7], [6, 3, 2]]

#def find_difference(input_list_1):
#    difference_list = []
#    for i in range(len(a)):
#        for j in range

def compare_data(num):
    compare_list = a[num]
    new_list = [[abs(compare_list[j] - a[i][j]) for j in range(1, len(a[i]))] for i in range(len(a))]
    print(new_list)
    similar_data = [a[i] 
                    for i in range(len(a))
                    if np.mean(new_list[i]) < 4 and i != num]
    return(similar_data)

compare_data(0)


[[0, 0, 0], [2, 6, 2], [7, 7, 9], [17, 0, 1], [13, 11, 9], [9, 7, 9]]


[['apple', 3, 8, 1]]