In [1]:
#ML Article used as ML reference
#https://towardsdatascience.com/preprocessing-with-sklearn-a-complete-and-comprehensive-guide-670cb98fcfb9

In [2]:
import numpy as np
import scipy as sp
import pandas as pd

In [3]:
raw_data = pd.read_csv('sample2.csv')

In [4]:
raw_data.shape

(249959, 36)

In [5]:
raw_data.head(1)

Unnamed: 0,_text,azimuthAngle,created,downSignal,elevationAngle,friendlyName,isArray,isDDOR,isMSPA,name_x,...,power_y,signalType_y,signalTypeDebug_y,spacecraft_y,spacecraftId_y,downlegRange,id,name_y,rtlt,uplegRange
0,STA,180.0,2019-01-13T23:40:09.011Z,"[{'signalType': 'none', 'signalTypeDebug': 'ID...",88.5,Goldstone,False,False,False,DSS14,...,1.3e-05,none,OFF 0,STA,234.0,222528900.0,234.0,STA,1484.459294,222499600.0


In [6]:
raw_data.columns

Index(['_text', 'azimuthAngle', 'created', 'downSignal', 'elevationAngle',
       'friendlyName', 'isArray', 'isDDOR', 'isMSPA', 'name_x', 'station',
       'target', 'timeUTC', 'timeZoneOffset', 'upSignal', 'updated',
       'windSpeed', 'dataRate_x', 'frequency_x', 'power_x', 'signalType_x',
       'signalTypeDebug_x', 'spacecraft_x', 'spacecraftId_x', 'dataRate_y',
       'frequency_y', 'power_y', 'signalType_y', 'signalTypeDebug_y',
       'spacecraft_y', 'spacecraftId_y', 'downlegRange', 'id', 'name_y',
       'rtlt', 'uplegRange'],
      dtype='object')

In [7]:
# Renaming column extensions (from webscrape) that have "_x" to "downsignal", and "_y" to "upsignal".

data_new_names = raw_data.rename(columns={"name_x":"DSS","friendlyName":"location","dataRate_x": "dataRate_downsignal", 
    "frequency_x": "frequency_downsignal", "power_x":"power_downsignal","signalType_x":"signalType_downsignal",
    "signalTypeDebug_x":"signalTypeDebug_downsignal", "spacecraft_x":"spacecraft_downsignal",
    "spacecraftId_x":"spacecraftId_downsignal", "dataRate_y":"dataRate_upsignal", "frequency_y":"frequency_upsignal", 
    "power_y":"power_upsignal","signalType_y":"signalType_upsignal","signalTypeDebug_y":"signalTypeDebug_upsignal",
    "spacecraft_y":"spacecraft_upsignal","spacecraftId_y":"spacecraftId_upsignal",  "name_y":"name_upsignal"})

In [8]:
data_new_names.columns

Index(['_text', 'azimuthAngle', 'created', 'downSignal', 'elevationAngle',
       'location', 'isArray', 'isDDOR', 'isMSPA', 'DSS', 'station', 'target',
       'timeUTC', 'timeZoneOffset', 'upSignal', 'updated', 'windSpeed',
       'dataRate_downsignal', 'frequency_downsignal', 'power_downsignal',
       'signalType_downsignal', 'signalTypeDebug_downsignal',
       'spacecraft_downsignal', 'spacecraftId_downsignal', 'dataRate_upsignal',
       'frequency_upsignal', 'power_upsignal', 'signalType_upsignal',
       'signalTypeDebug_upsignal', 'spacecraft_upsignal',
       'spacecraftId_upsignal', 'downlegRange', 'id', 'name_upsignal', 'rtlt',
       'uplegRange'],
      dtype='object')

In [9]:
df = data_new_names[['timeUTC','DSS', 'location','created','timeZoneOffset','azimuthAngle','elevationAngle', 'isArray', 
           'isDDOR', 'isMSPA', 'windSpeed', 'dataRate_downsignal', 'frequency_downsignal','power_downsignal',
           'signalType_downsignal', 'spacecraft_downsignal', 'spacecraftId_downsignal', 
          'dataRate_upsignal', 'frequency_upsignal','power_upsignal', 'signalType_upsignal','spacecraft_upsignal', 'spacecraftId_upsignal',
           'downlegRange', 'id','name_upsignal', 'rtlt', 'uplegRange','signalTypeDebug_downsignal', 'signalTypeDebug_upsignal']]

In [10]:
df.head(1)

Unnamed: 0,timeUTC,DSS,location,created,timeZoneOffset,azimuthAngle,elevationAngle,isArray,isDDOR,isMSPA,...,signalType_upsignal,spacecraft_upsignal,spacecraftId_upsignal,downlegRange,id,name_upsignal,rtlt,uplegRange,signalTypeDebug_downsignal,signalTypeDebug_upsignal
0,1547424005485,DSS14,Goldstone,2019-01-13T23:40:09.011Z,-28800000,180.0,88.5,False,False,False,...,none,STA,234.0,222528900.0,234.0,STA,1484.459294,222499600.0,IDLE OFF 0 TURBO,OFF 0


# DATA SUBSETTING

In [12]:
# Goldstone complex subset, then broken out by DSS (Deep Space Station)
df_Goldstone = df.loc[df['location'] == "Goldstone"]
df_Goldstone_DSS14 = df.loc[df['DSS'] == "DSS14"]  #Each DSS (Deep Space Station) will be model individually.
df_Goldstone_DSS24 = df.loc[df['DSS'] == "DSS24"]  #Each DSS (Deep Space Station) will be model individually.
df_Goldstone_DSS25 = df.loc[df['DSS'] == "DSS25"]  #Each DSS (Deep Space Station) will be model individually.
df_Goldstone_DSS26 = df.loc[df['DSS'] == "DSS26"]  #Each DSS (Deep Space Station) will be model individually.

In [13]:
# Madrid subset, then broken out by DSS (Deep Space Station)
df_Madrid = df.loc[df['location'] == "Madrid"]
#Each DSS (Deep Space Station) will be model individually.
#Each DSS (Deep Space Station) will be model individually.
#Each DSS (Deep Space Station) will be model individually.
#Each DSS (Deep Space Station) will be model individually.

In [14]:
# Canberra subset, then broken out by DSS (Deep Space Station)
df_Canberra = df.loc[df['location'] == "Canberra"]
#Each DSS (Deep Space Station) will be model individually.
#Each DSS (Deep Space Station) will be model individually.
#Each DSS (Deep Space Station) will be model individually.
#Each DSS (Deep Space Station) will be model individually.

In [15]:
print(df_Goldstone.shape)
print(df_Madrid.shape)
print(df_Canberra.shape)

(85227, 30)
(76736, 30)
(87996, 30)


# Data Processing 

Explore each columns for NA's for outlier values

In [16]:
df.isnull().values.any()

True

In [17]:
df.isna().values.any()

True

In [18]:
 df.isnull().sum()

timeUTC                            0
DSS                                0
location                           0
created                            0
timeZoneOffset                     0
azimuthAngle                   40827
elevationAngle                 40827
isArray                            0
isDDOR                             0
isMSPA                             0
windSpeed                      40827
dataRate_downsignal            86966
frequency_downsignal           86968
power_downsignal               86966
signalType_downsignal          35663
spacecraft_downsignal          35663
spacecraftId_downsignal        35663
dataRate_upsignal             168942
frequency_upsignal             48215
power_upsignal                 50718
signalType_upsignal                0
spacecraft_upsignal            35653
spacecraftId_upsignal          35653
downlegRange                   35653
id                             35653
name_upsignal                  35653
rtlt                           35653
u

In [19]:
 df.isnull().sum().sum()

1043479

In [20]:
 df.isna().sum().sum()

1043479

In [21]:
df.shape

(249959, 30)

In [24]:
# 86% of data is not missing, 14% is.  This could just mean no activity is present, and its not actually null/na.
print(((30*249959)-1043479)/(30*249959))

0.8608466455165313


Do I have missing values? How are they expressed in the data? Should I withhold samples with missing values?
Or should I replace them? If so, which values should they be replaced with?

In [27]:
df_preprocess = df[['timeUTC','DSS', 'location','created','timeZoneOffset','azimuthAngle','elevationAngle', 'isArray', 
           'isDDOR', 'isMSPA', 'windSpeed', 'dataRate_downsignal', 'frequency_downsignal','power_downsignal',
           'signalType_downsignal', 'spacecraftId_downsignal','dataRate_upsignal', 'frequency_upsignal',
            'power_upsignal', 'signalType_upsignal', 'spacecraftId_upsignal','downlegRange', 
            'id', 'rtlt', 'uplegRange','signalTypeDebug_downsignal', 'signalTypeDebug_upsignal']]

In [28]:
df_preprocess.head(1)

Unnamed: 0,timeUTC,DSS,location,created,timeZoneOffset,azimuthAngle,elevationAngle,isArray,isDDOR,isMSPA,...,frequency_upsignal,power_upsignal,signalType_upsignal,spacecraftId_upsignal,downlegRange,id,rtlt,uplegRange,signalTypeDebug_downsignal,signalTypeDebug_upsignal
0,1547424005485,DSS14,Goldstone,2019-01-13T23:40:09.011Z,-28800000,180.0,88.5,False,False,False,...,7186,1.3e-05,none,234.0,222528900.0,234.0,1484.459294,222499600.0,IDLE OFF 0 TURBO,OFF 0


In [29]:
df_preprocess.signalTypeDebug_downsignal.unique()

array(['IDLE OFF 0 TURBO', 'IDLE OFF 0 MCD2', 'IN LOCK OFF 1 MCD2',
       'IN LOCK IN LOCK 1 TURBO', 'IDLE OFF 1 TURBO', '  -1 ', nan,
       'IDLE OFF 1 MCD2', 'OUT OF LOCK OUT OF LOCK 0 TURBO',
       'OUT OF LOCK WAIT FOR LOCK 1 TURBO', 'OUT OF LOCK OFF 0 MCD2',
       'OUT OF LOCK OUT OF LOCK 1 TURBO', 'OUT OF LOCK OFF 1 MCD2',
       'IDLE OFF 0 MCD3', 'IDLE OFF 0 UNC', 'IDLE OFF 1 MCD3',
       'OUT OF LOCK OFF 1 MCD3', 'OUT OF LOCK OFF 0 MCD3',
       'IN LOCK OFF 1 MCD3', 'IN LOCK OFF 0 MCD3',
       'IN LOCK IN LOCK 0 TURBO', 'IN LOCK OFF 0 MCD2', 'IDLE OFF 1 UNC',
       'OUT OF LOCK OFF 1 UNC', 'IN LOCK OFF 1 UNC', 'IN LOCK OFF 0 UNC',
       'OUT OF LOCK WAIT FOR LOCK 0 TURBO', 'OUT OF LOCK IN LOCK 1 TURBO'],
      dtype=object)

In [51]:
#One Hot Encoder funtion in scipy will not run with NAN values.  Therefore, converting NAN's below.
print(df_preprocess.signalTypeDebug_downsignal.isna().count())
print(df_preprocess.signalTypeDebug_upsignal.isna().count())

249959
249959


In [40]:
df_preprocess.signalTypeDebug_upsignal.unique()

array(['OFF 0 ', 'ON 1 ', '  ', 'OFF 0 IDLE', 'ON 1 TRK', ' 0 IDLE',
       ' 0 TRK', 'none', 'ON 1 IDLE', '  TRK', '  IDLE', 'ON 0 ',
       'OFF 0 TRK', 'OFF 1 ', 'ON 0 CAL', 'ON 1 CAL', 'ON 0 IDLE',
       'OFF 1 IDLE', 'ON 0 TRK', ' 0 ', 'OFF 1 TRK'], dtype=object)

In [36]:
#Suppose you have a Pandas dataframe, df, and in one of your columns, Are you a cat?, you have a slew of 
#NaN values that you'd like to replace with the string No. Here's how to deal with that:
#df['Are you a Cat?'].fillna('No', inplace=True)

df_preprocess = df_preprocess.fillna("Missing_data")

print(df_preprocess.head(1))
#df_preprocess['signalTypeDebug_downsignal'].fillna("Missing_signalTypeDebug_downsignal", inplace=True)
#df_preprocess['signalTypeDebug_upsignal'].fillna("Missing_signalTypeDebug_upsignal", inplace=True)


         timeUTC    DSS   location                   created  timeZoneOffset  \
0  1547424005485  DSS14  Goldstone  2019-01-13T23:40:09.011Z       -28800000   

  azimuthAngle elevationAngle  isArray  isDDOR  isMSPA  ...  \
0          180           88.5    False   False   False  ...   

  frequency_upsignal power_upsignal signalType_upsignal spacecraftId_upsignal  \
0               7186        1.3e-05                none                   234   

  downlegRange   id     rtlt uplegRange signalTypeDebug_downsignal  \
0  2.22529e+08  234  1484.46  2.225e+08           IDLE OFF 0 TURBO   

  signalTypeDebug_upsignal  
0                   OFF 0   

[1 rows x 27 columns]


In [37]:
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
#encode = LabelEncoder

In [29]:
# separate array into input and output components
#scaler = MinMaxScaler(feature_range=(0, 1))
#df_Gold_preprocess = preprocessing.StandardScaler().fit_transform(df_Goldstone)

In [None]:
#encode.fit_transform

# Categorical Features-One Hot Encoding

Once you know what type of categorical data you’re working on, you can pick a suiting transformation tool.
In sklearn that will be a OrdinalEncoder for ordinal data, and a OneHotEncoder for nominal data.
Remember that we can’t replace these features by a number since this would imply the features have an order.

In [38]:

from sklearn.preprocessing import OneHotEncoder

onehot = OneHotEncoder(dtype=np.int, sparse=True)
nominals = pd.DataFrame(
    onehot.fit_transform(df_preprocess[['location', 'DSS','signalTypeDebug_downsignal','signalTypeDebug_upsignal']])\
    .toarray(),
    columns=['Goldstone', 'Madrid', 'Canberra', 'DSS63','DSS65', 'DSS54','DSS55','DSS14', 'DSS24','DSS25', 'DSS26',
            'DSS43', 'DSS34','DSS35', 'DSS36','DSS60','IDLE OFF 0 TURBO', 'IDLE OFF 0 MCD2', 'IN LOCK OFF 1 MCD2',
            'IN LOCK IN LOCK 1 TURBO', 'IDLE OFF 1 TURBO', '  -1', 'Missing_data', 'IDLE OFF 1 MCD2', 
            'OUT OF LOCK OUT OF LOCK 0 TURBO','OUT OF LOCK WAIT FOR LOCK 1 TURBO', 'OUT OF LOCK OFF 0 MCD2',
            'OUT OF LOCK OUT OF LOCK 1 TURBO', 'OUT OF LOCK OFF 1 MCD2','IDLE OFF 0 MCD3', 'IDLE OFF 0 UNC', 
            'IDLE OFF 1 MCD3','OUT OF LOCK OFF 1 MCD3', 'OUT OF LOCK OFF 0 MCD3','IN LOCK OFF 1 MCD3', 
            'IN LOCK OFF 0 MCD3','IN LOCK IN LOCK 0 TURBO', 'IN LOCK OFF 0 MCD2', 'IDLE OFF 1 UNC',
            'OUT OF LOCK OFF 1 UNC', 'IN LOCK OFF 1 UNC', 'IN LOCK OFF 0 UNC','OUT OF LOCK WAIT FOR LOCK 0 TURBO', 
             'OUT OF LOCK IN LOCK 1 TURBO','OFF 0 ', 'ON 1 ', 'Missing_data', 'OFF 0 IDLE', 'ON 1 TRK', ' 0 IDLE',
             ' 0 TRK', 'none', 'ON 1 IDLE', '  TRK', '  IDLE', 'ON 0 ', 'OFF 0 TRK', 'OFF 1 ', 'ON 0 CAL', 
             'ON 1 CAL', 'ON 0 IDLE','OFF 1 IDLE', 'ON 0 TRK', ' 0 ', 'OFF 1 TRK'])


#It is important to have a word on how to handle missing values with the OneHotEncoder.
#A missing value can easily be handled as an extra feature. Note that to do this, you 
#need to replace the missing value by an arbitrary value first (e.g. ‘missing’)
#If you, on the other hand, want to ignore the missing value and create an instance with all
#zeros (False), you can just set the handle_unkown parameter of the OneHotEncoder to ignore.


In [39]:
print(nominals.head(1))

   Goldstone  Madrid  Canberra  DSS63  DSS65  DSS54  DSS55  DSS14  DSS24  \
0          0       1         0      1      0      0      0      0      0   

   DSS25  ...  ON 0   OFF 0 TRK  OFF 1   ON 0 CAL  ON 1 CAL  ON 0 IDLE  \
0      0  ...      0          0       0         0         0          0   

   OFF 1 IDLE  ON 0 TRK   0   OFF 1 TRK  
0           0         0    0          0  

[1 rows x 65 columns]


In [162]:
print(nominals.shape)

(249959, 65)


In [76]:
#Remove the target variable from the training set
#The target variable is 'signalTypeDebug_downsignal' which we remove and 
#assign as an array to its own variable. We will use it later when we do machine learning.

#y = df.pop('signalTypeDebug_downsignal').values

If we want to use the preprocessing in the ‘supervised learning’, then it is better to ‘split’ the dataset 
as ‘test and train’ first; and then apply the preprocessing to the ‘training data’ only.
This is the good practice as in real-life problems we will not have the future data for preprocessing.

Normalization
Normalization is the process of scaling individual samples to have unit norm. In basic terms you need to normalize 
data when the algorithm predicts based on the weighted relationships formed between data points. Scaling inputs to
unit norms is a common operation for text classification or clustering.

One of the key differences between scaling (e.g. standardizing) and normalizing, is that normalizing is a 
row-wise operation, while scaling is a column-wise operation.

Below, the formula’s for the available norms are discussed and implemented in Python code — where the result
is a list of denominators for each sample in data set X .

The max norm uses the absolute maximum and does for samples what the MaxAbsScaler does for features.
x_normalized = x / max(x)

In [41]:
#norm_max = list(max(list(abs(i) for i in df_preprocess.iloc[r])) for r in range(len(df_preprocess)))

#‘L1’ it is insensitive to outliers
#The L1 norm uses the sum of all the values as and thus gives equal penalty to all parameters, enforcing sparsity.
#x_normalized = x / sum(X)
#norm_l1 = list(sum(list(abs(i) for i in df_preprocess.iloc[r])) for r in range(len(df_preprocess)))

#preprocessing.normalize(nominals, norm='l2')  # outliers are taken into consideration
#preprocessing.normalize(df_preprocess, norm='l1')

#‘L2’ takes outliers into consideration during training :).  Use L2 when outliers are important!!!  
#The l2 norm uses the square root of the sum of all the squared values. This creates smoothness
#and rotational invariance. Some models, like PCA, assume rotational invariance, and so l2 will perform better.
#x_normalized = x / sqrt(sum((i**2) for i in X))
#norm_l2 = list(math.sqrt(sum(list((i**2) for i in df_preprocess.iloc[r]))) for r in range(len(df_preprocess)))

In [40]:
preprocessing.normalize(nominals, norm='l2')  # outliers are taken into consideration

array([[0. , 0.5, 0. , ..., 0. , 0. , 0. ],
       [0. , 0.5, 0. , ..., 0. , 0. , 0. ],
       [0. , 0.5, 0. , ..., 0. , 0. , 0. ],
       ...,
       [0.5, 0. , 0. , ..., 0. , 0. , 0.5],
       [0.5, 0. , 0. , ..., 0. , 0. , 0.5],
       [0.5, 0. , 0. , ..., 0. , 0. , 0. ]])

# Normalized Subsets

In [164]:
#Normalized subsets
# Goldstone subset  DO I have to subset??
df_Goldstone_nominal = nominals.loc[nominals['Goldstone']==1]
df_DSS14_nominal = nominals.loc[nominals['DSS14']==1]
df_DSS24_nominal = nominals.loc[nominals['DSS24']==1]
df_DSS25_nominal = nominals.loc[nominals['DSS25']==1]
df_DSS26_nominal = nominals.loc[nominals['DSS26']==1]


####################################################################
df_Madrid_nominal = nominals.loc[nominals['Madrid']==1]

#####################################################################
df_Canberra_nominal = nominals.loc[nominals['Canberra']==1]

In [165]:
df_DSS14_nominal.head()

Unnamed: 0,Goldstone,Madrid,Canberra,DSS63,DSS65,DSS54,DSS55,DSS14,DSS24,DSS25,...,ON 0,OFF 0 TRK,OFF 1,ON 0 CAL,ON 1 CAL,ON 0 IDLE,OFF 1 IDLE,ON 0 TRK,0,OFF 1 TRK
5,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
13,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
21,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
29,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
37,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0


# Train and Test Multi-Classification Split

In [166]:
df_Goldstone_nominal_train = df_Goldstone_nominal.loc[:61597,:]
df_Goldstone_nominal_test = df_Goldstone_nominal.loc[61597:,:]

While the ground station is searching for a signal, it may ‘lock on’ to a signal from a different spacecraft
and wrongly identify it as the spacecraft being searched for. This is particularly common with spacecraft at
Mars as multiple spacecraft are within the field of view of a single DSN antenna. For example, attempts to
recover the Opportunity Rover (MERB) may appear successful when the antenna has actually locked on to a 
signal from one of the orbiters around Mars such as MAVEN or MRO. When this occurs, 
engineers ask the antenna to ‘drop lock’ and the hunt for the spacecraft continues.

In [167]:
print(nominals.shape)

(249959, 65)


In [168]:
from sklearn import linear_model
from scipy.special import expit
from sklearn.model_selection import train_test_split
#nominals_train = [:-65]
#nominals_train = [-65:]

log = linear_model.LogisticRegression(solver='lbfgs', C=1e5,
    multi_class='multinomial')
#log.fit(iris_X_train, iris_y_train)  
#LogisticRegression(C=100000.0, class_weight=None, dual=False,
#fit_intercept=True, intercept_scaling=1, l1_ratio=None, max_iter=100,
#multi_class='multinomial', n_jobs=None, penalty='l2', random_state=None,
#solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)

In [None]:
#Multiclass classification
#If you have several classes to predict, an option often used is to fit one-versus-all classifiers and then
#use a voting heuristic for the final decision.

# Multi-Classification Model Development, Test, and Selection

In [95]:
#Model Selection
#We are now ready to experiment with different machine learning models, evaluate their accuracy and 
#find the source of any potential issues. We will benchmark the following four models:
#Logistic Regression
#(Multinomial) Naive Bayes
#Linear Support Vector Machine
#Random Forest

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB

In [156]:
#models = [
#    RandomForestClassifier(n_estimators=200, max_depth=3, random_state=0),
#    LinearSVC(),
#    MultinomialNB(),
#    LogisticRegression(random_state=0),
#]
#CV = 5
#cv_df = pd.DataFrame(index=range(CV * len(models)))
#entries = []
#for model in models:
#  model_name = model.__class__.__name__
#  accuracies = cross_val_score(model, features, labels, scoring='accuracy', cv=CV)
#  for fold_idx, accuracy in enumerate(accuracies):
#    entries.append((model_name, fold_idx, accuracy))
#cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])
#import seaborn as sns
#sns.boxplot(x='model_name', y='accuracy', data=cv_df)
#sns.stripplot(x='model_name', y='accuracy', data=cv_df, 
              #size=8, jitter=True, edgecolor="gray", linewidth=2)
#plt.show()

# Supervised ML Classification Labels: Maximum Convolutional Decoder (MCD)

DSN 208
Telemetry Data Decoding
Purpose
810-005 208, Rev. B
This module describes the capabilities and performance of the telemetry decoding and frame synchronization equipment used by the Deep Space Network (DSN) in order to assist the telecommunications engineer in designing compatible spacecraft equipment.

The DSN supports two convolutional codes, the Consultative Committee for Space Data Systems (CCSDS) 
standard Reed-Solomon code, and the CCSDS Turbo codes. Convolutional codes are used because they achieve 
significant coding gain with simple, highly reliable encoders and their decoders are of reasonable complexity. They also provide low latency and are useful when conditions may prevent a block of symbols from being received. The Reed- Solomon code provides excellent performance with minimum bandwidth expansion in a high signal-to-noise environment. It is most often used as an outer code in combination with a convolutional inner code but may be used by itself under appropriate signal conditions. Turbo codes provide near-Shannon-limit error-correction performance with reasonable encoding and decoding complexity. The DSN presently includes an additional convolutional decoder that is used for the Cassini spacecraft support but it will be removed from service at the end of that mission.


2.5.4.1 Reed-Solomon Encoder
The most common architecture for an RS encoder is named the Berlekamp Architecture, after its inventor. 
This architecture, in combination with appropriate selection of the RS code generator polynomial, enables 
parity symbols to be calculated using bit-serial multipliers constructed with a matrix of exclusive OR gates. 
Figure 7 shows the design of a Berlekamp encoder for producing the DSN/CCSDS standard RS code that includes support
for interleaving and virtual fill as discussed below.
2.5.4.2 Concatenated Convolutional and Reed–Solomon Code Errors in convolutionally coded channels tend to occur
in bursts that result when noise causes the decoder to momentarily follow the wrong path through the decoding 
trellis. The combination of an outer Reed–Solomon (RS) code with an inner convolutional code provides good 
burst-error correction with minimal bandwidth expansion.
2.5.4.3 Interleaving
The burst errors associated with Viterbi decoding can be as long as several constraint lengths and equivalent to 
several consecutive RS symbols. Thus, several closely spaced error bursts can exceed an RS decoder's error 
correction capability.

Turbo Codes
Turbo codes provide error correction performance within approximately 0.8 dB of the theoretical limit at a BER of 
10–6. This performance is achieved using encoders and decoders of reasonable complexity but at the expense of 
increased latency. A turbo code is a systematic block code where two sets of parity symbols from independent 
recursive convolutional encoders are provided. The encoders employ trellis termination so that the codeblock both 
begins and ends in a known state.
The use of recursive convolutional encoders is one feature of turbo codes. The second is the presence of an 
interleaver at the input of one of the convolutional encoders that changes the order of the information bits 
before they are encoded. It is the presence of the interleaver that establishes the minimum latency as equaling 
the block size as an entire block of data must be assembled before the parity generation process can begin. 
Although the information bits appear, unchanged, in the encoded output, they do not appear contiguously as is the
case with Reed Solomon codes.

Data Formatting
The result of the previously described processing is a series of fixed-length frames of telemetry data. The content of
these frames may represent a single stream of telemetry data or a portion of several streams of telemetry data 
referred to as virtual channels, Virtual channels allocate the physical channel on a frame by frame basis identified
by a virtual channel identifier. The DSN separates the frames based on the virtual channel identifier and creates 
independent streams of telemetry data. The use of virtual channels enables portions of the data stream to be delivered
to different locations or with different latencies. Two types of telemetry frames are supported. Version I Frames, 
originally specified in CCSDS Recommendation 102.0-B, have the capability to support up to eight virtual channels
numbered from 0 to 7. Version II Frames, originally specified in CCSDS Recommendation 701.0-B, have the capability 
to support up to sixty-four virtual channels. The DSN can combine from 1 to 16 of these channels into virtual data 
streams and the same virtual channel may appear in multiple virtual data streams. The number of virtual data streams
that can be created for any one project is limited to 16.

3.4.3 Telemetry Processing
Two BVRs are assigned to a project’s tracking pass. Each BVR has phase- locked loops for acquiring and tracking the 
carrier, telemetry subcarrier, and telemetry symbol stream. Voyager generates a 22.5-kHz subcarrier for use with bit
rates less than or equal to 7.2 kbps and a 360-kHz subcarrier for use with bit rates greater than 7.2 kbps. In the
residual carrier mode, the X-band carrier.

Voyager Telecommunication 61
modulation index settings vary from 51 deg for the lowest data rate (10 bps) to 80 deg for the highest (115.2 kbps).17
The BVR delivers telemetry symbols to the maximum likelihood convolutional decoder (MCD). Voyager can use either the 
Block 2 or Block 3 MCD (MCD2 or MCD3)18 to process the (7,1/2) convolutional code. The MCD outputs decoded telemetry 
bits to the frame synchronizer subsystem (FSS).
An MCD/FSS pair makes up a telemetry channel assembly (TCA). The telemetry group controller governs the operation of 
TCA1 (with MCD3) and TCA2 (with MCD2). After the MCD achieves lock, the FSS requires recognition of a minimum of two 
successive frame-sync words to output telemetry to the project. Validation requires recognition of a third sync word.
The number of allowable miscompares (between received and expected bit values) in each frame-sync word recognized by 
the synchronizer can be set in the software.
3.5 Sample Telecom System Performance
The Voyager spacecraft receives an S-band uplink from the Earth and transmits S-and X-band downlinks to the Earth,
compatible with DSN station configurations and performance defined in the DSN Network Operations Plan for VIM [9] 
and the DSN Telecommunications Link Design Handbook [7].