# Single PC Analysis

This file will explore individual PC's in order to determine any unique differences at a more simplified level. This will be represented through 2D graphs with the horizontal axis in time and the vertical axis as a single PC. There will of course be 16 lines per graph and 3 graphs for the 3 different PC's.

In [26]:
import pandas as pd
from scipy import stats
import numpy as np

mouse_df_list = list()

# pull 16 mice PCA data
for i in range(1,17):
    df = pd.read_excel("data/newmicedata/PCFAnimal{}.xlsx".format(i))
    mouse_df_list.append(df)

for i in range(len(mouse_df_list)):
    mouse_df_list[i] = mouse_df_list[i].drop(['Unnamed: 0'], axis=1)


mouse_df_list[0]

Unnamed: 0,DateTime,principal component 1,principal component 2,principal component 3
0,2022/11/04 14:01:00,17.132797,1.502005,-6.294937
1,2022/11/04 14:31:00,16.811791,2.291058,-7.172389
2,2022/11/04 15:01:00,15.028200,2.660764,-6.699671
3,2022/11/04 15:31:00,14.183293,2.758718,-5.704350
4,2022/11/04 16:01:00,11.949887,3.195511,-4.635048
...,...,...,...,...
1136,2022/11/28 06:01:00,-1.396782,-0.328081,-1.957727
1137,2022/11/28 06:31:00,-4.110406,0.404820,-0.490767
1138,2022/11/28 07:01:00,-0.687362,-0.372090,-2.771991
1139,2022/11/28 07:31:00,1.670424,-0.888519,-3.420816


In [27]:
# create individual lists for the three eventual graphs
pc1_list = []
pc2_list = []
pc3_list = []
for mouse in mouse_df_list:
    pc1_list.append(mouse.drop(['principal component 2', 'principal component 3'], axis=1))
    pc2_list.append(mouse.drop(['principal component 1', 'principal component 3'], axis=1))
    pc3_list.append(mouse.drop(['principal component 2', 'principal component 1'], axis=1))

pc1_list[0]

Unnamed: 0,DateTime,principal component 1
0,2022/11/04 14:01:00,17.132797
1,2022/11/04 14:31:00,16.811791
2,2022/11/04 15:01:00,15.028200
3,2022/11/04 15:31:00,14.183293
4,2022/11/04 16:01:00,11.949887
...,...,...
1136,2022/11/28 06:01:00,-1.396782
1137,2022/11/28 06:31:00,-4.110406
1138,2022/11/28 07:01:00,-0.687362
1139,2022/11/28 07:31:00,1.670424


In [28]:
# 23 different points (24th is not a complete 24 hours)
DATAPOINTS = 23
NUMCOMPONENTS = 3

# these arrays contain arrays that represent curves, which contain 23 "points" each (arrays) which contain the 3 x y z coordinates
PC1_means = []
PC2_means = []
PC3_means = []
covs = []
skews = []
kurts = []
for i in range(1, NUMCOMPONENTS + 1):
    for mouse in mouse_df_list:
        # mouse_curve has 23 points in it
        mouse_curve_mean = []
        mouse_curve_cov = []
        mouse_curve_skew = []
        mouse_curve_kurt = []
        for j in range(1, DATAPOINTS + 1):
            # grab the first 24 rows of that column
            # mouse is mouse n's dataFrame
            column = (mouse[['principal component {}'.format(i)]].iloc[(j * 48) - 48:j * 48]).to_numpy()
            # column is an array of arrays with one number in each inner array so refactor to just one array with 24 elements
            column_refactor = []
            for n in range(len(column)):
                column_refactor.append(column[n][0])
            # run moment calculations
            # column = df.to_numpy()
            col_moments = stats.describe(column_refactor)
            mouse_curve_mean.append((col_moments).mean)
            mouse_curve_cov.append((col_moments).variance)
            mouse_curve_skew.append((col_moments).skewness)
            mouse_curve_kurt.append((col_moments).kurtosis)
        if i == 1:
            PC1_means.append(mouse_curve_mean)
        elif i == 2:
            PC2_means.append(mouse_curve_mean)
        else:
            PC3_means.append(mouse_curve_mean)
        covs.append(mouse_curve_cov)
        skews.append(mouse_curve_skew)
        kurts.append(mouse_curve_kurt)

print(len(PC2_means))
print(len(PC2_means[0]))
PC1_means[0]


16
23


[5.369327990404503,
 3.0283141308015025,
 3.50084617488193,
 3.7538178729490146,
 3.58762028569582,
 2.4804528821395104,
 2.28865395933424,
 2.3232666168825804,
 2.071414271193642,
 2.3789974298385093,
 1.2974605898012204,
 0.895272325807808,
 1.2076869587328323,
 1.1611115084679882,
 1.0316024800230403,
 0.6938321446111431,
 0.7324975809652141,
 0.05002739559207884,
 -0.5933250604389609,
 -1.3178172399672192,
 -0.024388076868910685,
 -0.7738364774374077,
 -0.6804187623768114]

In [29]:
# create times to add back to curve df lists
times = []
for j in range(1, DATAPOINTS + 1):
    date = (mouse['DateTime'].iloc[(j * 48) - 48])
    times.append(date)

curves_1_df = []
curves_2_df = []
curves_3_df = []
# add DateTime column back to every mouse mean
for mean in PC1_means:
    mean_df = pd.DataFrame(mean, columns = ['PC_1'])
    mean_df['DateTime'] = times
    curves_1_df.append(mean_df)
for mean in PC2_means:
    mean_df = pd.DataFrame(mean, columns = ['PC_2'])
    mean_df['DateTime'] = times
    curves_2_df.append(mean_df)
for mean in PC3_means:
    mean_df = pd.DataFrame(mean, columns = ['PC_3'])
    mean_df['DateTime'] = times
    curves_3_df.append(mean_df)

print(len(curves_1_df))
curves_1_df[0]

16


Unnamed: 0,PC_1,DateTime
0,5.369328,2022/11/04 14:01:00
1,3.028314,2022/11/05 14:01:00
2,3.500846,2022/11/06 14:01:00
3,3.753818,2022/11/07 14:01:00
4,3.58762,2022/11/08 14:01:00
5,2.480453,2022/11/09 14:01:00
6,2.288654,2022/11/10 14:01:00
7,2.323267,2022/11/11 14:01:00
8,2.071414,2022/11/12 14:01:00
9,2.378997,2022/11/13 14:01:00


In [30]:
import plotly.express as px
import plotly.graph_objects as go

# Visualizing the mean data points


# # Define the coordinates and labels for the points you want to label
# point_x = []
# point_y = []
# point_z = []
# for curve in curves_df:
#     point_x.append(curve.at[0, 'PC_1'])
#     point_x.append(curve.at[DATAPOINTS - 1, 'PC_1'])
#     point_y.append(curve.at[0, 'PC_2'])
#     point_y.append(curve.at[DATAPOINTS - 1, 'PC_2'])
#     point_z.append(curve.at[0, 'PC_3'])
#     point_z.append(curve.at[DATAPOINTS - 1, 'PC_3'])

# point_labels = ['1S', '1E', '2S', '2E', '3S', '3E', '4S', '4E', '5S', '5E', '6S', '6E', '7S', '7E', '8S', '8E', '9S', '9E', '10S', '10E', '11S', '11E', '12S', '12E', '13S', '13E', '14S', '14E', '15S', '15E', '16S', '16E', ]

# # Create a scatter trace with text annotations for the labeled points
# scatter_trace = go.Scatter3d(
#     x=point_x,
#     y=point_y,
#     z=point_z,
#     mode='markers+text',
#     marker=dict(
#         size=5,
#         color='red',
#     ),
#     text=point_labels,
#     textposition='bottom center',
#     name='Labeled Points'
# )

# # Add the scatter trace to the figure
# fig.add_trace(scatter_trace)

# 23 different points
fig = go.Figure()

for i in range(len(curves_1_df)):
    line_num = i + 1
    line_trace = go.Scatter(
        x=curves_1_df[i]['DateTime'].to_numpy(),
        y=curves_1_df[i]['PC_1'].to_numpy(),
        mode='lines',
        name='Line {}'.format(line_num),
        line_color='blue' if line_num in [7,8,15,16] else 'red'
    )
    fig.add_trace(line_trace)

fig.update_layout(
    title='PC1 Mean over Time',
    xaxis_title='DateTime',
    yaxis_title='PC_1'
)

# fig.write_html("downloads/PC1Mean_over_Time.html")
fig

In [31]:
fig = go.Figure()

for i in range(len(curves_2_df)):
    line_num = i + 1
    line_trace = go.Scatter(
        x=curves_2_df[i]['DateTime'].to_numpy(),
        y=curves_2_df[i]['PC_2'].to_numpy(),
        mode='lines',
        name='Line {}'.format(line_num),
        line_color='blue' if line_num in [7,8,15,16] else 'red'
    )
    fig.add_trace(line_trace)

fig.update_layout(
    title='PC2 Mean over Time',
    xaxis_title='DateTime',
    yaxis_title='PC_2'
)

# fig.write_html("downloads/PC2Mean_over_Time.html")
fig

In [32]:
fig = go.Figure()

for i in range(len(curves_3_df)):
    line_num = i + 1
    line_trace = go.Scatter(
        x=curves_3_df[i]['DateTime'].to_numpy(),
        y=curves_3_df[i]['PC_3'].to_numpy(),
        mode='lines',
        name='Line {}'.format(line_num),
        line_color='blue' if line_num in [7,8,15,16] else 'red'
    )
    fig.add_trace(line_trace)

fig.update_layout(
    title='PC3 Mean over Time',
    xaxis_title='DateTime',
    yaxis_title='PC_3'
)

# fig.write_html("downloads/PC3Mean_over_Time.html")
fig