In [1]:
import numpy as np
import pandas as pd

def extractValuefromSpectrum(startIndx,endIndx,spectrum):
    return [spectrum[str(indx)] for indx in range(startIndx,endIndx+1)]

Low_cali_linear_loss = [-21.144444444444446, -21.200000000000003, -21.26666666666667, -21.155555555555555, -21.13333333333333, -12.5, -12.722222222222225, -12.666666666666666, -12.6, -12.733333333333336, -12.722222222222225, -12.688888888888892, -12.688888888888892, -12.7, -12.722222222222221, -12.7, -12.68888888888889, -12.622222222222224, -12.600000000000001, -12.68888888888889, -12.666666666666666, -12.766666666666667, -12.733333333333334, -12.68888888888889, -12.700000000000001, -12.7, -12.655555555555557, -12.711111111111114, -12.68888888888889, -12.677777777777782, -12.58888888888889, -12.566666666666666, -12.5, -12.566666666666666, -12.677777777777779, -12.76666666666667, -12.677777777777777, -12.68888888888889, -12.644444444444446, -12.67777777777778, -12.722222222222225, -12.633333333333335, -12.644444444444446, -12.700000000000001, -12.81111111111111, -12.799999999999999, -12.755555555555558, -12.711111111111114, -12.722222222222221, -12.688888888888888, -12.711111111111114, -12.700000000000001, -12.788888888888891, -12.76666666666667, -12.68888888888889, -12.622222222222224, -12.700000000000001, -12.777777777777777, -12.744444444444447, -12.633333333333333, -12.655555555555557, -12.58888888888889, -12.622222222222224, -12.633333333333335, -12.633333333333335, -12.655555555555557, -12.733333333333334, -12.755555555555553, -12.68888888888889, -12.71111111111111, -12.777777777777779, -12.855555555555554, -12.8, -12.722222222222221, -12.844444444444443, -12.75555555555556, -12.899999999999999, -12.855555555555554, -12.877777777777778, -12.811111111111112, -12.81111111111111, -12.800000000000004, -12.799999999999999, -12.788888888888888, -12.777777777777775, -12.799999999999999, -12.844444444444441, -12.855555555555554, -12.866666666666667, -12.788888888888886, -12.844444444444443, -12.866666666666667, -12.97777777777778, -12.955555555555556, -12.922222222222222]
#this function only report the input/output spectra per experiment data chunk
def before_gain_calculation(buffer):
    input_spectras = []
    output_spectras= []
    input_spectras = buffer["roadm_dut_line_out_spectra"]
    input_spectras = [input_spectras[str(i+1)] for i in range(len(input_spectras))]
    output_spectras = buffer["roadm_aux_line_in_spectra"]
    output_spectras = [output_spectras[str(i+1)] for i in range(len(output_spectras))]


    input_spectras = np.asarray(input_spectras)
    output_spectras = np.asarray(output_spectras)
    return input_spectras,output_spectras


def calculate_raman_tilt(open_channels, output_spectra, low_cali_linear_loss):
    output_interest = [output_spectra[i] for i in open_channels]
    linear_loss_interest = [low_cali_linear_loss[i] for i in open_channels]
    output_interest = np.array(output_interest) - np.array(linear_loss_interest)
    coefficients = np.polyfit(open_channels, output_interest, 2)
    polynomial = np.poly1d(coefficients)
    fit_gain = polynomial(open_channels)
    raman_tilt = fit_gain[0] - fit_gain[-1]
    raman_tilt_linear = 10**(raman_tilt / 10)
    return raman_tilt_linear

def featureExtraction(data):
    features_list = []
    for item in data:
        open_channels = [int(ch) for ch in item["open_channels"]]
        fiber_in_power = item["roadm_dut_line_port"]["output-power"]
        fiber_out_power = item["roadm_aux_line_port"]["input-power"]
        open_channels_mean = np.mean(open_channels)
        open_channels_variance = np.var(open_channels)
        input_spectrum_values = [float(item["roadm_dut_line_out_spectra"][str(ch)]) for ch in open_channels]
        input_spectrum_mean = np.mean(input_spectrum_values)
        input_spectrum_variance = np.var(input_spectrum_values)
        
        # Input spectra Extraction
        input_spectra, output_spectra = before_gain_calculation(item)
        
        # Raman tilt calculation
        open_channels_indexes = np.array(open_channels) - 1
        raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
        
        features_list.append({
            'num_opened_channels': len(open_channels), 
            'open_channels_mean': open_channels_mean, 
            'open_channels_variance': open_channels_variance, 
            'input_spectrum_mean': input_spectrum_mean, 
            'input_spectrum_variance': input_spectrum_variance, 
            'Total_fiber_in_power': fiber_in_power, 
            'Total_fiber_out_power': fiber_out_power,
            # 'Raman_tilt': raman_tilt
        })
        
    features_df = pd.DataFrame(features_list)
    return features_df



def featureExtraction_random(data, channel_number):
    features_list = []
    
    for item in data:
        if item["num_opened_channels"] != channel_number:
            continue
        
        open_channels = [int(ch) for ch in item["open_channels"]]
        fiber_in_power = item["roadm_dut_line_port"]["output-power"]
        fiber_out_power = item["roadm_aux_line_port"]["input-power"]
        open_channels_mean = np.mean(open_channels)
        open_channels_variance = np.var(open_channels)
        input_spectrum_values = [float(item["roadm_dut_line_out_spectra"][str(ch)]) for ch in open_channels]
        input_spectrum_mean = np.mean(input_spectrum_values)
        input_spectrum_variance = np.var(input_spectrum_values)

        # 获取输入输出光谱
        input_spectra, output_spectra = before_gain_calculation(item)
        
        # 计算Raman tilt
        open_channels_indexes = np.array(open_channels) - 1  # 通道索引调整为从0开始
        raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
        
        features_list.append({
            'num_opened_channels': len(open_channels), 
            'open_channels_mean': open_channels_mean, 
            'open_channels_variance': open_channels_variance, 
            'input_spectrum_mean': input_spectrum_mean, 
            'input_spectrum_variance': input_spectrum_variance, 
            'Total_fiber_in_power': fiber_in_power, 
            'Total_fiber_out_power': fiber_out_power,
            # 'Raman_tilt': raman_tilt  # 添加Raman tilt作为特征
        })
        
    features_df = pd.DataFrame(features_list)
    return features_df


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
def featureExtraction4(data):
    features_list = []
    for item in data:
        open_channels = [int(ch) for ch in item["open_channels"]]
        fiber_in_power = item["roadm_dut_line_port"]["output-power"]
        fiber_out_power = item["roadm_aux_line_port"]["input-power"]
        open_channels_variance = np.var(open_channels)
        input_spectrum_values = [float(item["roadm_dut_line_out_spectra"][str(ch)]) for ch in open_channels]
        input_spectrum_variance = np.var(input_spectrum_values)
        
        # Input spectra Extraction
        input_spectra, output_spectra = before_gain_calculation(item)
        
        # Raman tilt calculation
        open_channels_indexes = np.array(open_channels) - 1
        raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
        
        features_list.append({
            'num_opened_channels': len(open_channels), 
            'open_channels_variance': open_channels_variance, 
            'Total_fiber_in_power': fiber_in_power, 
            'Total_fiber_out_power': fiber_out_power,
            'Raman_tilt': raman_tilt
        })
        
    features_df = pd.DataFrame(features_list)
    return features_df

def featureExtraction_random4(data, channel_number):
    features_list = []
    
    for item in data:
        if item["num_opened_channels"] != channel_number:
            continue
        
        open_channels = [int(ch) for ch in item["open_channels"]]
        fiber_in_power = item["roadm_dut_line_port"]["output-power"]
        fiber_out_power = item["roadm_aux_line_port"]["input-power"]
        open_channels_variance = np.var(open_channels)
        input_spectrum_values = [float(item["roadm_dut_line_out_spectra"][str(ch)]) for ch in open_channels]

        # 获取输入输出光谱
        input_spectra, output_spectra = before_gain_calculation(item)
        
        # 计算Raman tilt
        open_channels_indexes = np.array(open_channels) - 1  # 通道索引调整为从0开始
        raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
        
        features_list.append({
            'num_opened_channels': len(open_channels), 
            'open_channels_variance': open_channels_variance, 
            'Total_fiber_in_power': fiber_in_power, 
            'Total_fiber_out_power': fiber_out_power,
            # 'Raman_tilt': raman_tilt  # 添加Raman tilt作为特征
        })
        
    features_df = pd.DataFrame(features_list)
    return features_df

3 features

In [3]:
def featureExtraction3(data):
    features_list = []
    for item in data:
        open_channels = [int(ch) for ch in item["open_channels"]]
        fiber_in_power = item["roadm_dut_line_port"]["output-power"]
        fiber_out_power = item["roadm_aux_line_port"]["input-power"]
        open_channels_variance = np.var(open_channels)
        input_spectrum_values = [float(item["roadm_dut_line_out_spectra"][str(ch)]) for ch in open_channels]
        input_spectrum_variance = np.var(input_spectrum_values)
        
        # Input spectra Extraction
        input_spectra, output_spectra = before_gain_calculation(item)
        
        # Raman tilt calculation
        open_channels_indexes = np.array(open_channels) - 1
        raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
        
        features_list.append({
            'open_channels_variance': open_channels_variance, 
            'Total_fiber_in_power': fiber_in_power, 
            'Total_fiber_out_power': fiber_out_power,
            'Raman_tilt': raman_tilt
        })
        
    features_df = pd.DataFrame(features_list)
    return features_df

def featureExtraction_random3(data, channel_number):
    features_list = []
    
    for item in data:
        if item["num_opened_channels"] != channel_number:
            continue
        
        open_channels = [int(ch) for ch in item["open_channels"]]
        fiber_in_power = item["roadm_dut_line_port"]["output-power"]
        fiber_out_power = item["roadm_aux_line_port"]["input-power"]
        open_channels_variance = np.var(open_channels)
        input_spectrum_values = [float(item["roadm_dut_line_out_spectra"][str(ch)]) for ch in open_channels]

        # 获取输入输出光谱
        input_spectra, output_spectra = before_gain_calculation(item)
        
        # 计算Raman tilt
        open_channels_indexes = np.array(open_channels) - 1  # 通道索引调整为从0开始
        raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
        
        features_list.append({
            'open_channels_variance': open_channels_variance, 
            'Total_fiber_in_power': fiber_in_power, 
            'Total_fiber_out_power': fiber_out_power,
            'Raman_tilt': raman_tilt  # 添加Raman tilt作为特征
        })
        
    features_df = pd.DataFrame(features_list)
    return features_df

Main Function

In [4]:
import json
import pandas as pd

train_ratio = 0.8
data_prepath = "../data/"
output_prepath = "../feature2/"

fileList = ['fibre_25km_step', 'fibre_25km_random', 'fibre_25km_edge_goalpost', 
            'fibre_50km_step', 'fibre_50km_random', 'fibre_50km_edge_goalpost']

# Initialize DataFrames for different combinations
# Use a dictionary to dynamically manage the dataframes based on the type and distance
extractedFeatures = {}

for fileName in fileList:
    print(fileName)
    filePath = data_prepath + fileName + ".json"

    with open(filePath, "r") as read_file:
        data = json.load(read_file)

        # Determine type and distance for filename
        if '25km' in fileName:
            distance = '25km'
        elif '50km' in fileName:
            distance = '50km'
        else:
            distance = 'unknown'

        # Apply feature extraction based on file type
        if 'step' in fileName:
            fileType = 'step'
            newFeature = featureExtraction4(data["measurement_data"])
        elif 'goalpost' in fileName:
            fileType = 'goalpost'
            newFeature = featureExtraction4(data["measurement_data"])
        elif 'random' in fileName:
            fileType = 'random'
            # newFeature = featureExtraction4(data["measurement_data"])
            newFeature = pd.DataFrame()  # Initialize empty DataFrame for concatenation
            for channel_number in [2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90]:
                tempFeature = featureExtraction_random4(data["measurement_data"], channel_number)
                newFeature = pd.concat([newFeature, tempFeature], ignore_index=True)

        # Key to identify each unique combination of type and distance
        key = f"{distance}_{fileType}"

        if key not in extractedFeatures:
            extractedFeatures[key] = newFeature
        else:
            extractedFeatures[key] = pd.concat([extractedFeatures[key], newFeature], ignore_index=True)

# Split and output the feature dataframes to CSV files, if needed
for key, df in extractedFeatures.items():
    if 'random' in key:
        # Perform train-test split for random type
        training_data = df.sample(frac=train_ratio, random_state=50)
        testing_data = df.drop(training_data.index)
        training_data.to_csv(output_prepath + key + '4' + "_out" + "_train.csv", index=False)
        testing_data.to_csv(output_prepath + key + '4' + "_out" + "_test.csv", index=False)
    else:
        # Directly output the DataFrame for step and goalpost types
        df.to_csv(output_prepath + key + '4' + "_no_out" + ".csv", index=False)

# for key, df in extractedFeatures.items():
#     # Perform train-test split for random type
#     training_data = df.sample(frac=train_ratio, random_state=50)
#     testing_data = df.drop(training_data.index)
#     training_data.to_csv(output_prepath + key + '4' + "_train.csv", index=False)
#     testing_data.to_csv(output_prepath + key + '4' + "_test.csv", index=False)

  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman

fibre_25km_step
fibre_25km_random
fibre_25km_edge_goalpost
fibre_50km_step
fibre_50km_random


  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman

fibre_50km_edge_goalpost


  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
  raman_tilt = calculate_raman_tilt(open_channels_indexes, output_spectra, Low_cali_linear_loss)
