# Phys 581 Winter 2019
# Assignment #5: Multiprocessing
## Alexander Hickey, 10169582

Note that this notebook makes use of the Keras deep learning library for python, which is compatible only with Python 2.7-3.6.

In [1]:
#Must be running Python 3.6 or lower!
import sys
sys.version

'3.6.8 |Anaconda, Inc.| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]'

In [2]:
#Import useful libraries
import time
import numpy as np
import scipy.integrate
import matplotlib.pyplot as plt
import sys, os
import signal, subprocess
import psutil
from multiprocessing import Pool
from keras.models import Model, Sequential
from keras.layers import Input, Dense
%matplotlib inline

Using TensorFlow backend.


### Introduction

Parallel computing allows one to harness the processing power of a multi-core computer, and can significantly decrease the time required for repetitive computations. The idea behind parallel computing is to take a large problem and divide it into smaller ones which can be solved simultaneously, and allocate these different tasks to different parts of the CPU network. Often times, it is very powerful to implement parallel computing in the framework of scientific computations, to allow researchers to execute code and analyze output within a reasonable timeframe.

In [3]:
print( psutil.cpu_stats(), '\n' )
print( psutil.cpu_times(), '\n' )
print( psutil.cpu_times_percent(interval=1, percpu=False), '\n' )
print( 'number of physical(?) CPUs: ', psutil.cpu_count(), '\n' )
print( psutil.cpu_times_percent(interval=1, percpu=True), '\n' )
print(sys.argv)

scpustats(ctx_switches=1116429151, interrupts=1506348531, soft_interrupts=0, syscalls=1876097662) 

scputimes(user=2164734.40625, system=33283.40625, idle=460384.40625, interrupt=975.65625, dpc=898.9375) 

scputimes(user=89.1, system=2.7, idle=7.8, interrupt=0.4, dpc=0.0) 

number of physical(?) CPUs:  8 

[scputimes(user=75.0, system=4.7, idle=20.3, interrupt=0.0, dpc=0.0), scputimes(user=92.2, system=1.6, idle=6.2, interrupt=0.0, dpc=0.0), scputimes(user=98.4, system=1.6, idle=0.0, interrupt=0.0, dpc=0.0), scputimes(user=100.0, system=0.0, idle=0.0, interrupt=0.0, dpc=0.0), scputimes(user=84.4, system=3.1, idle=12.5, interrupt=0.0, dpc=0.0), scputimes(user=85.9, system=3.1, idle=10.9, interrupt=0.0, dpc=0.0), scputimes(user=96.9, system=1.6, idle=1.6, interrupt=0.0, dpc=0.0), scputimes(user=100.0, system=0.0, idle=0.0, interrupt=0.0, dpc=0.0)] 

['C:\\Users\\Admin\\Miniconda3\\envs\\Python36\\lib\\site-packages\\ipykernel_launcher.py', '-f', 'C:\\Users\\Admin\\AppData\\Roaming\\jupyt

### Task: determine scaling efficiency
Gather estimates of computation as a function of number of processes, plot, and analyze.

Ideally we would have a perfectly linear relationship up to the number of CPUs and constant thereafter.  In practice the slope will be less than one and may saturate earlier.

In [21]:
cmdlist = [cmnd for j in range(5)]
for n, cmd in enumerate(cmdlist):
    print(n,cmd)

0 python -c "[i**2 for i in range(9876543)]; print(True)"
1 python -c "[i**2 for i in range(9876543)]; print(True)"
2 python -c "[i**2 for i in range(9876543)]; print(True)"
3 python -c "[i**2 for i in range(9876543)]; print(True)"
4 python -c "[i**2 for i in range(9876543)]; print(True)"


In [23]:
cmnd = 'python -c "[i**2 for i in range(9876543)]; print(True)"'

def time_par(cmdlist, time_res = 1e-5):
    '''
    This function will compute both the ...    
    Args:
        cmdlist: List of commands to track
        time_res: Time step between tracking
        
    Return:
        tlist: Array of both wall and cpu times for each process
               to terminate. Axis 0 corresponds to wall time,
               axis 1 corresponds to cpu time.
    
    '''
    plist = [subprocess.Popen(cmnd, shell=True) for j in cmdlist]
    ch_list = [0.0] 
    t0 = time.time()
    t_dict = {p.pid:0.0 for p in plist}
    
    
    while ch_list != []:
        
        ch_list = []
        time.sleep(time_res)
        
        for p in plist:    
            
            parent = psutil.Process(pid=p.pid)
            child = parent.children()
            
            try:
                proc = psutil.Process(pid=child[0].pid)
                cputime = proc.cpu_times().user
                t_dict[p.pid] = [time.time()-t0,cputime]
                ch_list.append(child)
                
            except:
                None
    
    t = np.array([t_dict[key] for key in t_dict ])
    print(t)
    return t

def ratio(t):
    
    
    return np.mean(t[:,1])/np.max(t[:,0])


    
t=time_par([cmnd for j in range(2)])
t

NoSuchProcess: psutil.NoSuchProcess no process found with pid 10932

In [None]:
2.315/2.47

In [None]:
N = 7
t_rat = []

for k in range(2,N+1):
    
    t = time_par([cmnd for j in range(k)])
    
    t_rat.append(ratio(t))
 

In [None]:
plt.figure(figsize=(8,6))
plt.plot(np.arange(2,N+1),t_rat,marker= 'o')
plt.xlabel('N')
plt.ylabel('wall/cpu')

plt.show()

### Task: Use the multiprocessing package to speed up hyperparameter optimization.    

In [None]:
def create_model(nodes=[8,11,6]):
    '''
    This function creates a dense Keras model with connectivity:
    3-nodes-3 where nodes represents the "deep" layers.
    
    Args:
        nodes: List describing number of deep layers and number of nodes at
               each layer. 
        
    Return:
        model: Dense Keras model with desired connectivity.
    
    '''
    #Create model
    model = Sequential()
    
    #Define input layer
    model.add(Dense(nodes[0], input_dim=3, activation='relu'))
    
    #Define "deep" layers
    for n in nodes[1:]:
        
        model.add(Dense(n, activation='relu'))
    
    #Define output layer
    model.add(Dense(3, activation='linear')) #sigmoid'))

    #Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    return model

def train_model(model,xyz,epochs = 999,L=900,ndt=99):
    '''
    This function trains a given Keras model to shift a time series
    forward by some timestep ndt.
    
    Args:
        model: Keras model
        ndt: Timestep to shift time series
        xyz: 3D time series data
        L: Length of training interval
        epochs: Number of epochs to perform training
        
    Return:
        model: Trained model
    
    '''
    
    history = model.fit(xyz[:L],xyz[ndt:L+ndt], epochs=epochs, batch_size=100, 
                        verbose=0)
    
    return model

def mean_square(args):
    '''
    args = [nodes,epochs,xyz]
    
    '''
    
    vs, ndt, L = 5000, 99, 900
    nodes,epochs,xyz = args
    mod = create_model(nodes = nodes)
    mod = train_model(mod,xyz,epochs = epochs)
    
    return np.sqrt( np.mean( (xyz[vs+ndt:vs+L+ndt,:]-mod.predict(xyz[vs:vs+L]) )**2))

NCPU = psutil.cpu_count()
print('Number of CPUs: '+str(NCPU))

In [None]:
def dfunc(state, t0, sigma=10.0, beta=8/3.0, rho=58.0):
    '''
    This returns the time derivative of the coordinates,
    defined by the 3D Lorenz system:
    dx/dt = sigma*(y-x)
    dy/dt = x*(rho-z)-y
    dz/dt = x*y-beta*z
    
    Args:
        state: array of length 3, coordinates at time t0
        t0: Time
        sigma, beta, rho: Lorentz system parameters
        
    Return:
        d/dt state: array, Time derivative of coordinates
    
    '''
    
    #Unpack state vector
    x, y, z = state
    
    return np.array([ sigma*(y-x), x*(rho-z)-y, x*y-beta*z])

#Define time interval of interest
t0, tf, tstep = 0.0, 20.0, 9999
tvals = np.linspace(t0, tf, tstep)

#Set initial state set to [1,1,1]
xyz_0 = np.ones(3)

#Integrate Lorenz system over time interval
xyz = scipy.integrate.odeint( dfunc, xyz_0, tvals )

In [None]:
num_layers = [[5,8,5],[5,8,8,5],[5,8,8,8,5],[5,8,8,8,8,5],[5,8,8,8,8,8,5]]
epochs = 999
n_lay = [[num_layers[j],epochs,xyz] for j in range(len(num_layers))]

if __name__ == '__main__':
    
    with Pool(NCPU) as p:
        m_square = p.map(mean_square, n_lay)
        
print(m_square)

### Conclusion