In [2]:
import numpy as np
import scipy.io
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import datetime
import random
import pandas as pd
import altair as alt
import warnings

%matplotlib notebook


## Functions

In [3]:
def get_xy(index):
    '''Gets the time and values of a given index 
    and stores them in an array of appropriate size.
    The time is the first column and the data are the
    subsequent columns'''
    
    t = dh[index].x[0]
    y = np.transpose(dh[index].y[0])
    if len(np.shape(y)) == 1:
        data = np.zeros((np.shape(t)[0],2))
        data[:,1] = y
    else:
        data = np.zeros((np.shape(t)[0],np.shape(y)[1]+1))
        data[:,1:np.shape(y)[1]+1] = y
    data[:,0] = t
    return data

In [4]:
def unix_to_utc(unix_time_array):
    '''Takes array of tick labels in unix time
    and converts them into readable utc'''
    result = [None]*(len(unix_time_array))
    for i in range(len(unix_time_array)):
        result[i] = datetime.datetime.utcfromtimestamp(unix_time_array[i]
        ).strftime('%H:%M:%S')
    print("result type:",type(result))
    return result

In [5]:
def multi_plot(data, minX_arr, maxX_arr):
    '''Creates len(minX) number of plots with 6 subplots each
    with nicely formatted datetime xlabels'''
    
    for i in range(len(minX_arr)):
        
        fig = plt.figure(figsize=(8,6.5));

        ax1 = fig.add_subplot(6,1,1);
        ax2 = fig.add_subplot(6,1,2,sharex = ax1);
        ax3 = fig.add_subplot(6,1,3,sharex = ax1);
        ax4 = fig.add_subplot(6,1,4,sharex = ax1);
        ax5 = fig.add_subplot(6,1,5,sharex = ax1);
        ax6 = fig.add_subplot(6,1,6,sharex = ax1);
  
        ax2.set_prop_cycle('color', ['r', 'g', 'b'])   

        ax1.plot(data[0][:,0], data1[:,1:]);
        ax2.plot(data[1][:,0], data2[:,1:]);
        ax3.plot(data[2][:,0], data3[:,1:]);
        ax4.plot(data[3][:,0], data4[:,1:]);
        ax5.plot(data[4][:,0], data5[:,1:]);
        ax6.plot(data[5][:,0], data6[:,1:]);

        ax1.set_ylabel('$|\mathbf{B}_{o}| (nT)$')
        ax2.set_ylabel('$\mathbf{B}_{o} (nT)$')
        ax3.set_ylabel('$n_{e}\ (cm^{-3})$')
        ax4.set_ylabel('$T_{e}$ (eV)')
        ax5.set_ylabel('$n_{i}\ (cm^{-3})$')
        ax6.set_ylabel('$T_{i}$ (eV)')

        ax6.set_xlabel('Time (July 14, 2008)')

        for j in range(len(fig.axes)-1):
            plt.setp(fig.axes[j].get_xticklabels(), visible=False)

        for j in range(len(fig.axes)):
            fig.axes[j].set_xlim(minX[i], maxX[i])

        ax6.set_xticklabels(unix_to_utc(ax6.get_xticks()));

        plt.tight_layout()
        
    return
        

In [6]:
def get_index(string_handle):
	for i in range(len(dq)):
		string = dq[i][0].decode()
		if string[4:] == string_handle:
			index = i
	return index

In [7]:
def k_means(data, k):
    '''Implementation of simple k-means clustering algorithm
    for n dimensional data. Each row of data should be a 
    different dimension from which to figure out the euclidian distance.
    
    Returns closest_cent for which the ith entry is the number
    of the closest centroid to the ith data point and 
    centroids which contains the coordinates for each centroid'''
    
    #Initilaize clocest centroid matrix and distance counters
    closest_cent = np.empty(len(data))
    dist_prev = np.empty(len(data))
    dist_current = np.empty(len(data))
    
    #Initialize position of centroids
    indices = random.sample(range(len(data)),k)
    centroids = data[indices,:]
    
    #Stopping condition initialization
    total_dist_prev = 2.0
    total_dist_current = 1.0
    
    
    while abs((total_dist_current - total_dist_prev)/total_dist_current) > 0.1:
        total_dist = 0
        #Find closest centroid to each point
        for i in range(len(data)): #Data points
            dist = np.empty(k)
            dist = dist.tolist()
            dist_prev = np.empty(len(data))
            dist_current = np.empty(len(data))
            for j in range(k): #Centroids
                dist_sum = 0
                for l in range(np.shape(data)[1]): #Dimensions of data set
                    dist_sum = dist_sum + ((data[i,l] - centroids[j,l]) ** 2)
                dist[j] = dist_sum
                #Value i of closest_cent corresponds to the closest centroid of the ith data point
            print(dist)
            closest_cent[i] = dist.index(np.nanmin(dist))
                #closest_cent[i] = [dis for ind, dis in dist if dis == np.nanmin(dist)]
                #Will this get rid of the runtime error???
                
            dist_prev[i] = dist_current[i]
            dist_current[i] = np.nanmin(dist)
            
            #Find the new location of each centroid from the mean
            #of each data point grouped with that centroid
            for cluster_num in range(k): #Clusters
                indices = [ind for ind, x in enumerate(closest_cent) if x == cluster_num] 
                for dim in range(np.shape(data)[1]): #Dimensions of data set
                    centroids[j,dim] = np.mean([data[ind,dim] for ind in indices])
                    
        total_dist_prev = np.sum(dist_prev)
        total_dist_current = np.sum(dist_current)  
                
    return closest_cent, centroids
                

# Code

In [8]:
#file = 'TPLOT_save_file_THC_FGM-2008-07-14.tplot'
#file = 'TPLOT_save_file_THC_FGM-ALL_EESA-2008-08-19.tplot'
file = 'TPLOT_save_file_THB_FGM-ALL_EESA_2008-07-14.tplot'
f = scipy.io.readsav(file,python_dict=True)

In [9]:
dq = f['dq']
dh = dq['dh']

In [10]:
#To find names and indicies
#Remember that it's offset 1 from IDL !!

for i in range(len(dq)):
    print(i, dq[i][0])

0 b'thb_state_pos'
1 b'thb_state_vel'
2 b'thb_state_man'
3 b'thb_state_roi'
4 b'thb_state_spinras'
5 b'thb_state_spindec'
6 b'thb_state_spinalpha'
7 b'thb_state_spinbeta'
8 b'thb_state_spinper'
9 b'thb_state_spinphase'
10 b'thb_state_spinras_correction'
11 b'thb_state_spindec_correction'
12 b'thb_state_pos_gse'
13 b'thb_state_pos_gsm'
14 b'thb_state_vel_gse'
15 b'thb_state_vel_gsm'
16 b'thb_state_pos_sel'
17 b'thb_state_pos_sse'
18 b'thb_state_vel_sel'
19 b'thb_state_vel_sse'
20 b'thb_state_spinras_corrected'
21 b'thb_state_spindec_corrected'
22 b'thb_fgs_gse'
23 b'thb_fgs_gsm'
24 b'thb_fgs_dsl'
25 b'thb_fgl_gse'
26 b'thb_fgl_gsm'
27 b'thb_fgl_dsl'
28 b'thb_fgl_ssl'
29 b'thb_fgh_gse'
30 b'thb_fgh_gsm'
31 b'thb_fgh_dsl'
32 b'thb_fgh_ssl'
33 b'thb_fgs_mag'
34 b'thb_fgs_fci_flh_fce'
35 b'thb_fgl_mag'
36 b'thb_fgl_fci_flh_fce'
37 b'thb_fgh_mag'
38 b'thb_fgh_fci_flh_fce'
39 b'thb_state_pos__sm'
40 b'thb__Rad'
41 b'thb__MLT'
42 b'thb_MLAT'
43 b'thb__LSH'
44 b'thb_ILAT'
45 b'thb_state_pos_gse

In [11]:
input_string1 = 'fgh_mag'
input_string2 = 'fgh_gse'
input_string3 = 'peeb_density'
input_string4 = 'peeb_avgtemp'
input_string5 = 'peib_density'
input_string6 = 'peib_avgtemp'

index1 = get_index(input_string1)
index2 = get_index(input_string2)
index3 = get_index(input_string3)
index4 = get_index(input_string4)
index5 = get_index(input_string5)
index6 = get_index(input_string6)

data1 = get_xy(index1)
data2 = get_xy(index2)
data3 = get_xy(index3)
data4 = get_xy(index4)
data5 = get_xy(index5)
data6 = get_xy(index6)

In [40]:
minX = [1216038203.4158614, 1216043733.1532767, 1216053273.8616252, 1216057036.8199000, 1216060132.5344240] 

maxX = [1216038704.7146266, 1216044348.3962560, 1216055150.4002540, 1216057647.8225791, 1216060749.1626596] 

data = [data1, data2, data3, data4, data5, data6]

multi_plot(data, minX, maxX)

<IPython.core.display.Javascript object>

result type: <class 'list'>


<IPython.core.display.Javascript object>

result type: <class 'list'>


<IPython.core.display.Javascript object>

result type: <class 'list'>


<IPython.core.display.Javascript object>

result type: <class 'list'>


<IPython.core.display.Javascript object>

result type: <class 'list'>


In [33]:
#TEST N DIMENSIONAL K MEANS ALGORITHM

input_string1 = 'peeb_powerlaws_good'
index1 = get_index(input_string1)
data1 = get_xy(index1)

colorstr = 'rgbkc'

data = np.empty((len(data1),3))

data[:,0] = data1[:,1]
data[:,1] = data1[:,2]
data[:,2] = data1[:,3]

#Get rid of NaN values
indices = [ind for ind, val in enumerate(data[:,0]) if ~np.isnan(val)]
print('data.shape[1]', data.shape[1])
for i in range(data.shape[1]):
    indices[i] = [ind for ind, val in enumerate(data[:,i]) if ~np.isnan(val)]
    
#THIS SHOULD WORK
###set.intersection(*indices)


indices_clean = indices[0]
print('shape(indices)', np.shape(indices))
for i in range(len(indices)-1):
    indices_clean = np.intersect1d(indices_clean, indices[i+1])
print('Indices Clean', indices_clean)
    
    
indices = [ind for ind, val in enumerate(data[:,0]) if ~np.isnan(val)]
data_clean = np.empty((len(indices),3))
data_clean[:,0] = data[indices,0] 
data_clean[:,1] = data[indices,1] 
data_clean[:,2] = data[indices,2] 


k = 3
closest_cent, centroids = k_means(data_clean,k)

fig = plt.figure(figsize=(7,7))
ax = fig.gca(projection='3d')

for i in range(k):
    indices = [ind for ind, val in enumerate(closest_cent) if int(val) == i]
    #Plot points color-coded to their respective cluster
    ax.scatter(data[indices,0], data[indices,1], data[indices,2],colorstr[i], marker='o',linestyle='None')
    #Plot Centroids
    ax.scatter(centroids[i,0], centroids[i,1],centroids[i,2], colorstr[i], marker='*', s=100)

data.shape[1] 3
shape(indices) (858,)
Indices Clean []
[nan, nan, nan]




ValueError: nan is not in list