<img src="https://news.illinois.edu/files/6367/543635/116641.jpg" alt="University of Illinois" width="250"/>

# PyTorch CNN#
By Richard Sowers
* <r-sowers@illinois.edu>
* <https://publish.illinois.edu/r-sowers/>

Copyright 2020 University of Illinois Board of Trustees. All Rights Reserved.

# Imports and Configurations

In [None]:
import os
import numpy
import pandas
idx = pandas.IndexSlice
import time
import random
import matplotlib
#%matplotlib notebook
import matplotlib.pyplot as plt
import scipy.stats
#from pandas.plotting import autocorrelation_plot
import matplotlib.offsetbox as offsetbox
from matplotlib.ticker import StrMethodFormatter
from matplotlib.backends.backend_agg import FigureCanvasAgg
import graphviz

import imageio
import PIL

def saver(fname):
    plt.savefig(fname+".png",bbox_inches="tight")

def legend(pos="bottom",ncol=3):
    if pos=="bottom":
        plt.legend(bbox_to_anchor=(0.5,-0.2), loc='upper center',facecolor="lightgray",ncol=ncol)
    elif pos=="side":
        plt.legend(bbox_to_anchor=(1.1,0.5), loc='center left',facecolor="lightgray",ncol=1)

def textbox(txt,fname=None):
    plt.figure(figsize=(1,1))
    plt.gca().add_artist(offsetbox.AnchoredText("\n".join(txt), loc="center",prop=dict(size=30)))
    plt.axis('off')
    if fname is not None:
        saver(fname)
    plt.show()
    plt.close()

In [None]:
import torch
import scipy

In [None]:
#for some reason, this needs to be in a separate cell
params={
    "font.size":15,
    "lines.linewidth":5,
}
plt.rcParams.update(params)

In [None]:
def getfile(location_pair,**kwargs): #tries to get local version and then defaults to google drive version
    (loc,gdrive)=location_pair
    try:
        out=pandas.read_csv(loc,**kwargs)
    except FileNotFoundError:
        print("local file not found; accessing Google Drive")
        loc = 'https://drive.google.com/uc?export=download&id='+gdrive.split('/')[-2]
        out=pandas.read_csv(loc,**kwargs)
    return out

# Settings

In [None]:
SEED=0
R=pandas.Series([3,4,4.5,5,5.5,6,6,6])
R.index.name='n'

signal_length=25
N_trainingdata=1000
N_trainingdata_visible=10
plot_color="blue"
ref_color="red"

# Data #

Let's synthesize some data

In [None]:
numpy.random.seed(SEED)
errorsize=0.3
trainingdata=pandas.DataFrame(scipy.stats.uniform.rvs(loc=0,scale=10,size=(signal_length,N_trainingdata)))
trainingdata.index=pandas.RangeIndex(start=0,stop=signal_length,name="n")

p=0.5
observations=range(1,N_trainingdata+1)

labels=scipy.stats.bernoulli.rvs(p=p,size=N_trainingdata).astype(numpy.bool)
labels[0]=True #force the first label for purposes of example

positions=numpy.random.randint(low=0,high=signal_length-len(R),size=N_trainingdata)
start=[(position if flag else None)  for (position,flag) in zip(positions,labels)]

trainingdata.columns=pandas.MultiIndex.from_tuples(zip(observations,labels,start), names=('observation','label','start'))

for ((observation,label,start),data) in trainingdata.iteritems():
    if label:
        trainingdata.loc[start+R.index,(observation,label,start)]=R.values+scipy.stats.norm.rvs(scale=errorsize,size=len(R))

trainingdata.iloc[:,:N_trainingdata_visible]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  labels=scipy.stats.bernoulli.rvs(p=p,size=N_trainingdata).astype(numpy.bool)
  for ((observation,label,start),data) in trainingdata.iteritems():


observation,1,2,3,4,5,6,7,8,9,10
label,True,True,True,True,True,True,False,True,False,True
start,3,7,14,16,2,12,NaN,14,NaN,11
n,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
0,5.488135,7.151894,6.027634,5.448832,4.236548,6.458941,4.375872,8.91773,9.636628,3.834415
1,5.928803,0.100637,4.758262,7.087704,0.439754,8.795215,5.200814,0.30661,2.244136,9.536757
2,8.115185,4.76084,5.23156,2.505206,3.273137,3.029048,5.77284,1.696781,1.594691,4.170297
3,3.200046,6.296183,7.785843,8.515578,4.199965,1.660771,8.283896,0.586291,2.001707,6.229267
4,3.425321,5.665183,1.374144,3.497122,4.344949,3.790681,1.141513,5.618129,6.415937,9.870915
5,4.405654,2.976242,0.685996,3.525275,4.887139,7.629273,1.116283,1.43681,2.785103,2.880268
6,4.626292,4.28686,6.830566,6.009478,5.226124,1.574779,0.705557,0.345901,7.760049,8.509589
7,5.116113,3.283017,7.821373,8.870782,5.916336,9.73627,3.441207,3.991273,3.676832,8.443634
8,5.337272,4.636938,7.023351,2.073241,5.861973,3.660266,3.406353,1.205241,8.545623,6.681478
9,6.221898,4.174067,7.739527,1.288654,5.735266,9.420893,1.616041,6.291562,2.357657,5.155481


In [None]:
#make inputs
#tensor
# want first dimension to be observationnumber
# for each observation, want 1 channel of len(trainingdata)
torch_features=torch.from_numpy(trainingdata.values.transpose().reshape(-1,1,len(trainingdata))).type(torch.float)
print("first observation in torch:",torch_features[0,:])
print("first observation in pandas: ",trainingdata.iloc[:,0].values)

# make labels
_,labels,_=zip(*trainingdata.columns)
torch_labels=torch.from_numpy(numpy.array(labels).reshape(-1,1)).type(torch.float)

first observation in torch: tensor([[5.4881, 5.9288, 8.1152, 3.2000, 3.4253, 4.4057, 4.6263, 5.1161, 5.3373,
         6.2219, 6.0089, 1.9296, 5.6469, 1.4601, 3.2060, 3.8614, 3.9554, 7.3737,
         8.1403, 4.5841, 3.9217, 6.6734, 9.8094, 3.1141, 1.5320]])
first observation in pandas:  [5.48813504 5.92880271 8.11518471 3.20004643 3.42532118 4.40565362
 4.62629233 5.11611256 5.33727162 6.22189817 6.0088977  1.92964246
 5.64690403 1.46007399 3.20595056 3.86137859 3.95537377 7.37372011
 8.14027103 4.58413253 3.92172961 6.67342349 9.80937541 3.11406359
 1.53196185]


# 1d Convolution

Let's make sure that we understand PyTorch 1d convolution

In [None]:
torch.manual_seed(SEED)
in_channels=1
out_channels=1
batchsize=1
kernel_size=len(R)
CNN_test = torch.nn.Conv1d(in_channels, out_channels, kernel_size)
CNN_test.weight.data=torch.tensor(R.values.reshape(CNN_test.weight.data.shape)).type(torch.float)
print("weight: ",CNN_test.weight.data)

weight:  tensor([[[3.0000, 4.0000, 4.5000, 5.0000, 5.5000, 6.0000, 6.0000, 6.0000]]])


In [None]:
print("output from CNN: ",CNN_test(torch_features[0:1]))
print("output from numpy correlate: ",numpy.correlate(trainingdata.iloc[:,0],R.values))

output from CNN:  tensor([[[196.3944, 196.4517, 200.0529, 199.7740, 188.5837, 197.1327, 176.7078,
          167.1997, 159.7787, 153.7544, 167.2009, 188.9990, 198.8266, 194.7580,
          215.5470, 248.8972, 240.0322, 218.3244]]],
       grad_fn=<ConvolutionBackward0>)
output from numpy correlate:  [196.42579686 196.4831158  200.0842552  199.80535101 188.61508025
 197.16409995 176.73920065 167.23105947 159.8100505  153.785821
 167.23226396 189.03040098 198.85797631 194.78935639 215.57835307
 248.92862656 240.0635719  218.35572324]


# CNN test class #

In [None]:
class CNN(torch.nn.Module):
  def __init__(self,kernel_size,observation_length,SEED=0):
    super().__init__()
    if SEED is not None:
          torch.manual_seed(SEED)
    in_channels=1
    out_channels=1
    self.conv1d = torch.nn.Conv1d(in_channels, out_channels, kernel_size,padding='same')
    # padding=same means output will be same size as input
    self.maxpool = torch.nn.MaxPool1d(observation_length)
    self.linear=torch.nn.Linear(out_channels,1)
    self.sigmoid=torch.nn.Sigmoid()
    self.ReLU=torch.nn.ReLU()
    if torch.cuda.is_available():
        "converting to cuda"
        self = self.cuda()

  def forward(self,input):
    out=self.conv1d(input)
    out=self.ReLU(out)
    out=self.maxpool(out)
    out=torch.flatten(out,1)
    out=self.linear(out)
    out=self.sigmoid(out)
    return out


Loss = torch.nn.BCELoss()

myCNN=CNN(len(R),len(trainingdata),SEED=1)

In [None]:
optimizer = torch.optim.Adam(myCNN.parameters())
losses=[]
MAX_iter=10000
for ctr in range(MAX_iter):

    # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
    optimizer.zero_grad()

    # get output from the model, given the inputs
    outputs = myCNN(torch_features)

    # get loss for the predicted output
    lossvalue = Loss(outputs, torch_labels)
    losses.append(lossvalue)

    # get gradients w.r.t to parameters
    lossvalue.backward()
    #print(model.linear.weight.grad.item(),model.linear.bias.grad.item())

    # update parameters
    optimizer.step()
    if ctr%int(MAX_iter/10)==0: #print out data for 10 intermediate steps
      print("iteration {}: loss={:.5f}".format(ctr, lossvalue.item()))

print("final loss={:.5f}".format(lossvalue.item()))

  return F.conv1d(input, weight, bias, self.stride,


iteration 0: loss=0.68175
iteration 1000: loss=0.64478
iteration 2000: loss=0.62681
iteration 3000: loss=0.62648
iteration 4000: loss=0.62643
iteration 5000: loss=0.62639
iteration 6000: loss=0.62639
iteration 7000: loss=0.62639
iteration 8000: loss=0.62639
iteration 9000: loss=0.62639
final loss=0.62639
