# The Daily PCA Graph
This file is intended to create and analyze graphs that only display points in time with the three principle components as axes. Overall, there will be 23(last day does not have a full 48 points of data) graphs each displaying the 16 points. This method is in response to the cycles.ipynb graph where it is very hard to notice differences with 16 lines crowding up the graph. By looking at the individual points that made the cycles graph, we are able to analyze the data at more manageable pace in more detail. 

In effect, the graph will just be a reinterpretation of the cycles graph, where instead of analyzing a line we analyze the points that created that line, which basically the same data intepretation with different representation.

In [35]:
import pandas as pd
from scipy import stats
import numpy as np

mouse_df_list = list()

# pull 16 mice PCA data
for i in range(1,17):
    df = pd.read_excel("data/newmicedata/PCFAnimal{}.xlsx".format(i))
    mouse_df_list.append(df)

for i in range(len(mouse_df_list)):
    mouse_df_list[i] = mouse_df_list[i].drop(['Unnamed: 0'], axis=1)


mouse_df_list[0]

Unnamed: 0,DateTime,principal component 1,principal component 2,principal component 3
0,2022/11/04 14:01:00,17.132797,1.502005,-6.294937
1,2022/11/04 14:31:00,16.811791,2.291058,-7.172389
2,2022/11/04 15:01:00,15.028200,2.660764,-6.699671
3,2022/11/04 15:31:00,14.183293,2.758718,-5.704350
4,2022/11/04 16:01:00,11.949887,3.195511,-4.635048
...,...,...,...,...
1136,2022/11/28 06:01:00,-1.396782,-0.328081,-1.957727
1137,2022/11/28 06:31:00,-4.110406,0.404820,-0.490767
1138,2022/11/28 07:01:00,-0.687362,-0.372090,-2.771991
1139,2022/11/28 07:31:00,1.670424,-0.888519,-3.420816


In [36]:
# 23 different points (24th is not a complete 24 hours)
DATAPOINTS = 23
NUMCOMPONENTS = 3

# these arrays contain arrays that represent curves, which contain 23 "points" each (arrays) which contain the 3 x y z coordinates
means = []
covs = []
skews = []
kurts = []
for mouse in mouse_df_list:
    # mouse_curve has 23 points in it
    mouse_curve_mean = []
    mouse_curve_cov = []
    mouse_curve_skew = []
    mouse_curve_kurt = []
    for i in range(1, DATAPOINTS + 1):
        mouse_mean = []
        mouse_cov = []
        mouse_skew = []
        mouse_kurt = []
        for j in range(1, NUMCOMPONENTS + 1):
            # grab the first 24 rows of that column
            # mouse is mouse n's dataFrame
            column = (mouse[['principal component {}'.format(j)]].iloc[(i * 48) - 48:i * 48]).to_numpy()
            # column is an array of arrays with one number in each inner array so refactor to just one array with 24 elements
            column_refactor = []
            for n in range(len(column)):
                column_refactor.append(column[n][0])
            # run moment calculations
            # column = df.to_numpy()
            col_moments = stats.describe(column_refactor)
            mouse_mean.append((col_moments).mean)
            mouse_cov.append((col_moments).variance)
            mouse_skew.append((col_moments).skewness)
            mouse_kurt.append((col_moments).kurtosis)
        mouse_curve_mean.append(mouse_mean)
        mouse_curve_cov.append(mouse_cov)
        mouse_curve_skew.append(mouse_skew)
        mouse_curve_kurt.append(mouse_kurt)
    means.append(mouse_curve_mean)
    covs.append(mouse_curve_cov)
    skews.append(mouse_curve_skew)
    kurts.append(mouse_curve_kurt)

print(len(means))
mouse_curve_kurt

16


[[-1.4831479071135023, -1.2680000317616198, -1.2811495275313278],
 [-1.7515928371958633, -1.6230267785377341, -1.62308114831041],
 [-1.7053042614712548, -1.613919463054078, -0.8629469554655085],
 [-1.3161894073791698, -1.3426586995339087, -0.4674460884274838],
 [-1.2492060891595984, -1.2741757053464264, 0.22125598266938473],
 [-0.9566888635611055, -1.3339393461814277, 1.3406822805352272],
 [-0.9939097755079511, -1.0958328488629059, 1.117647593544313],
 [-1.445026518479188, -1.4258838449646383, -0.10023506889335776],
 [-1.4728408244328797, -1.383080255574382, -0.7276528439907834],
 [-0.7583497408452153, -0.9019986609114423, -0.9499963946923469],
 [-1.0293920529290244, -0.5053182004350356, -0.36553008995066616],
 [-0.9642282236470456, -0.8278896040130328, -0.6666359627907537],
 [-1.1970491560204526, -0.6583690483017959, -0.24269583980682174],
 [-0.9867013934972633, -0.8834623150905192, -0.5292321730397056],
 [-1.1453952126519975, -0.8004204103658759, -0.661806405213869],
 [-1.12944206644

In [37]:
# create the data for the graphs
day1 = []
day2 = []
day3 = []
day4 = []
day5 = []
day6 = []
day7 = []
day8 = []
day9 = []
day10 = []
day11 = []
day12 = []
day13 = []
day14 = []
day15 = []
day16 = []
day17 = []
day18 = []
day19 = []
day20 = []
day21 = []
day22 = []
day23 = []

for mean in means:
    day1.append(mean[0])
    day2.append(mean[1])
    day3.append(mean[2])
    day4.append(mean[3])
    day5.append(mean[4])
    day6.append(mean[5])
    day7.append(mean[6])
    day8.append(mean[7])
    day9.append(mean[8])
    day10.append(mean[9])
    day11.append(mean[10])
    day12.append(mean[11])
    day13.append(mean[12])
    day14.append(mean[13])
    day15.append(mean[14])
    day16.append(mean[15])
    day17.append(mean[16])
    day18.append(mean[17])
    day19.append(mean[18])
    day20.append(mean[19])
    day21.append(mean[20])
    day22.append(mean[21])
    day23.append(mean[22])

day1

[[5.369327990404503, 4.270162312121401, -3.2001202522064798],
 [2.8788720810874504, 3.126263607833739, -3.420674477780518],
 [4.268652903891773, 3.872324340958434, -3.2841775950792544],
 [4.0107348147303865, 2.905172665417936, -1.9557319522594059],
 [5.685850143365776, 3.47440921187444, -2.7124676242730907],
 [4.2274404932518115, 5.017107096027965, -3.230635815819283],
 [4.167154033829479, 0.6689761730394457, -0.04649134098508587],
 [4.498936821585577, 2.742783313212989, -1.3229662110440625],
 [4.216524108693162, 6.573534778515132, -4.901152700846101],
 [5.475562465911374, 4.1617765712273345, -4.4362042097806125],
 [6.173555458214925, 0.9726738432474362, -0.9356297612124317],
 [4.467652910644868, 3.894808455557129, -2.4601809163254527],
 [5.693666468194124, 2.4876950272943943, -1.624657138587297],
 [5.942221775126263, 0.9225067139672251, -1.67683528454567],
 [5.068986259304769, 2.437245883935492, -0.786318815002529],
 [8.088045526845493, 0.04516665997753905, -1.404611732152981]]

In [38]:
# convert to dataframes then add the tumorous vs nontumorous labels for the graph

# add marker column for healthy vs sick
type = ['Tumor', 'Tumor', 'Tumor', 'Tumor', 'Tumor','Tumor', 'Control', 'Control', 'Tumor', 'Tumor','Tumor', 'Tumor', 'Tumor', 'Tumor', 'Control', 'Control']

day1_df = pd.DataFrame(day1, columns=['PC_1', 'PC_2', 'PC_3'])
day1_df['Type'] = type
day2_df = pd.DataFrame(day2, columns=['PC_1', 'PC_2', 'PC_3'])
day2_df['Type'] = type
day3_df = pd.DataFrame(day3, columns=['PC_1', 'PC_2', 'PC_3'])
day3_df['Type'] = type
day4_df = pd.DataFrame(day4, columns=['PC_1', 'PC_2', 'PC_3'])
day4_df['Type'] = type
day5_df = pd.DataFrame(day5, columns=['PC_1', 'PC_2', 'PC_3'])
day5_df['Type'] = type
day6_df = pd.DataFrame(day6, columns=['PC_1', 'PC_2', 'PC_3'])
day6_df['Type'] = type
day7_df = pd.DataFrame(day7, columns=['PC_1', 'PC_2', 'PC_3'])
day7_df['Type'] = type
day8_df = pd.DataFrame(day8, columns=['PC_1', 'PC_2', 'PC_3'])
day8_df['Type'] = type
day9_df = pd.DataFrame(day9, columns=['PC_1', 'PC_2', 'PC_3'])
day9_df['Type'] = type
day10_df = pd.DataFrame(day10, columns=['PC_1', 'PC_2', 'PC_3'])
day10_df['Type'] = type
day11_df = pd.DataFrame(day11, columns=['PC_1', 'PC_2', 'PC_3'])
day11_df['Type'] = type
day12_df = pd.DataFrame(day12, columns=['PC_1', 'PC_2', 'PC_3'])
day12_df['Type'] = type
day13_df = pd.DataFrame(day13, columns=['PC_1', 'PC_2', 'PC_3'])
day13_df['Type'] = type
day14_df = pd.DataFrame(day14, columns=['PC_1', 'PC_2', 'PC_3'])
day14_df['Type'] = type
day15_df = pd.DataFrame(day15, columns=['PC_1', 'PC_2', 'PC_3'])
day15_df['Type'] = type
day16_df = pd.DataFrame(day16, columns=['PC_1', 'PC_2', 'PC_3'])
day16_df['Type'] = type
day17_df = pd.DataFrame(day17, columns=['PC_1', 'PC_2', 'PC_3'])
day17_df['Type'] = type
day18_df = pd.DataFrame(day18, columns=['PC_1', 'PC_2', 'PC_3'])
day18_df['Type'] = type
day19_df = pd.DataFrame(day19, columns=['PC_1', 'PC_2', 'PC_3'])
day19_df['Type'] = type
day20_df = pd.DataFrame(day20, columns=['PC_1', 'PC_2', 'PC_3'])
day20_df['Type'] = type
day21_df = pd.DataFrame(day21, columns=['PC_1', 'PC_2', 'PC_3'])
day21_df['Type'] = type
day22_df = pd.DataFrame(day22, columns=['PC_1', 'PC_2', 'PC_3'])
day22_df['Type'] = type
day23_df = pd.DataFrame(day23, columns=['PC_1', 'PC_2', 'PC_3'])
day23_df['Type'] = type

day1_df

Unnamed: 0,PC_1,PC_2,PC_3,Type
0,5.369328,4.270162,-3.20012,Tumor
1,2.878872,3.126264,-3.420674,Tumor
2,4.268653,3.872324,-3.284178,Tumor
3,4.010735,2.905173,-1.955732,Tumor
4,5.68585,3.474409,-2.712468,Tumor
5,4.22744,5.017107,-3.230636,Tumor
6,4.167154,0.668976,-0.046491,Control
7,4.498937,2.742783,-1.322966,Control
8,4.216524,6.573535,-4.901153,Tumor
9,5.475562,4.161777,-4.436204,Tumor


In [39]:
import plotly.express as px

fig = px.scatter_3d(day1_df, x='PC_1', y='PC_2', z='PC_3', title='Day 1 Mean Plot',color='Type')
fig.write_html("downloads/day1_mean.html")
fig

In [40]:
fig = px.scatter_3d(day2_df, x='PC_1', y='PC_2', z='PC_3', title='Day 2 Mean Plot',color='Type')
fig.write_html("downloads/day2_mean.html")
fig

In [41]:
fig = px.scatter_3d(day3_df, x='PC_1', y='PC_2', z='PC_3', title='Day 3 Mean Plot',color='Type')
fig.write_html("downloads/day3_mean.html")
fig

In [42]:
fig = px.scatter_3d(day4_df, x='PC_1', y='PC_2', z='PC_3', title='Day 4 Mean Plot',color='Type')
fig.write_html("downloads/day4_mean.html")
fig

In [43]:
fig = px.scatter_3d(day5_df, x='PC_1', y='PC_2', z='PC_3', title='Day 5 Mean Plot',color='Type')
fig.write_html("downloads/day5_mean.html")
fig

In [44]:
fig = px.scatter_3d(day6_df, x='PC_1', y='PC_2', z='PC_3', title='Day 6 Mean Plot',color='Type')
fig.write_html("downloads/day6_mean.html")
fig

In [45]:
fig = px.scatter_3d(day7_df, x='PC_1', y='PC_2', z='PC_3', title='Day 7 Mean Plot',color='Type')
fig.write_html("downloads/day7_mean.html")
fig

In [46]:
fig = px.scatter_3d(day8_df, x='PC_1', y='PC_2', z='PC_3', title='Day 8 Mean Plot',color='Type')
fig.write_html("downloads/day8_mean.html")
fig

In [47]:
fig = px.scatter_3d(day9_df, x='PC_1', y='PC_2', z='PC_3', title='Day 9 Mean Plot',color='Type')
fig.write_html("downloads/day9_mean.html")
fig

In [48]:
fig = px.scatter_3d(day10_df, x='PC_1', y='PC_2', z='PC_3', title='Day 10 Mean Plot',color='Type')
fig.write_html("downloads/day10_mean.html")
fig

In [49]:
fig = px.scatter_3d(day11_df, x='PC_1', y='PC_2', z='PC_3', title='Day 11 Mean Plot',color='Type')
fig.write_html("downloads/day11_mean.html")
fig

In [50]:
fig = px.scatter_3d(day12_df, x='PC_1', y='PC_2', z='PC_3', title='Day 12 Mean Plot',color='Type')
fig.write_html("downloads/day12_mean.html")
fig

In [51]:
fig = px.scatter_3d(day13_df, x='PC_1', y='PC_2', z='PC_3', title='Day 13 Mean Plot',color='Type')
fig.write_html("downloads/day13_mean.html")
fig

In [52]:
fig = px.scatter_3d(day14_df, x='PC_1', y='PC_2', z='PC_3', title='Day 14 Mean Plot',color='Type')
fig.write_html("downloads/day14_mean.html")
fig

In [53]:
fig = px.scatter_3d(day15_df, x='PC_1', y='PC_2', z='PC_3', title='Day 15 Mean Plot',color='Type')
fig.write_html("downloads/day15_mean.html")
fig

In [54]:
fig = px.scatter_3d(day16_df, x='PC_1', y='PC_2', z='PC_3', title='Day 16 Mean Plot',color='Type')
fig.write_html("downloads/day16_mean.html")
fig

In [55]:
fig = px.scatter_3d(day17_df, x='PC_1', y='PC_2', z='PC_3', title='Day 17 Mean Plot',color='Type')
fig.write_html("downloads/day17_mean.html")
fig

In [56]:
fig = px.scatter_3d(day18_df, x='PC_1', y='PC_2', z='PC_3', title='Day 18 Mean Plot',color='Type')
fig.write_html("downloads/day18_mean.html")
fig

In [57]:
fig = px.scatter_3d(day19_df, x='PC_1', y='PC_2', z='PC_3', title='Day 19 Mean Plot',color='Type')
fig.write_html("downloads/day19_mean.html")
fig

In [58]:
fig = px.scatter_3d(day20_df, x='PC_1', y='PC_2', z='PC_3', title='Day 20 Mean Plot',color='Type')
fig.write_html("downloads/day20_mean.html")
fig

In [59]:
fig = px.scatter_3d(day21_df, x='PC_1', y='PC_2', z='PC_3', title='Day 21 Mean Plot',color='Type')
fig.write_html("downloads/day21_mean.html")
fig

In [60]:
fig = px.scatter_3d(day22_df, x='PC_1', y='PC_2', z='PC_3', title='Day 22 Mean Plot',color='Type')
fig.write_html("downloads/day22_mean.html")
fig

In [61]:
fig = px.scatter_3d(day23_df, x='PC_1', y='PC_2', z='PC_3', title='Day 23 Mean Plot',color='Type')
fig.write_html("downloads/day23_mean.html")
fig