# Program to process and combine outputs of each run and save as .CSV file (for each missile x interceptor combination)

#### Import libraries 

In [1]:
import pandas as pd
from tkinter import Tk, filedialog
import os
from xml.dom import minidom

#### Get the path to directory of files

In [2]:
def getDirectory():
    root = Tk() # pointing root to Tk() to use it as Tk() in program.
    root.withdraw() # Hides small tkinter window.
    root.attributes('-topmost', True) # Opened windows will be active. above all windows despite of selection.
    dir_path = filedialog.askdirectory(title='Select folder containing the files and subfolders') # Returns opened path as str
    print("\nSelected directory is:\n", dir_path)
    return dir_path

#### Ask where to save final result in CSV format

In [3]:
def saveFile():
    root = Tk() # pointing root to Tk() to use it as Tk() in program.
    root.withdraw() # Hides small tkinter window.
    root.attributes('-topmost', True) # Opened windows will be active. above all windows despite of selection.
    types=[('CSV types(*.csv)','*.csv')]
    save_path = filedialog.asksaveasfile(title='Where to save(Specify file name)',
                                         filetypes = types, 
                                         defaultextension = types) # Returns opened path as str
    print("\nSelected folder to save is:\n", save_path.name)
    return save_path.name

#### Get the list of all files(with their path) in the direcotry

In [4]:
def filesList(directory): 
    
    # Get the list of all files in directory tree at given path
    listOfFiles = list()
    for (dirpath, dirnames, filenames) in os.walk(directory):
        listOfFiles += [os.path.join(dirpath, file) for file in filenames]
    print("\nThere are "+str(len(listOfFiles))+ " files in the directory")
    return listOfFiles

#### Extract the list of XML file names from listOfFiles

In [5]:
def extractXMLFiles(listOfFiles):
    xmlFilesNameList = list()
    xmlFilesList = list()
    for file in listOfFiles:
        if os.path.splitext(file)[1] == '.xml':
            xmlFilesNameList += [os.path.basename(os.path.splitext(file)[0])]
            xmlFilesList +=[file] 
    print('\nThere are ' + str(len(xmlFilesNameList)) + ' XML files\n')
    return xmlFilesNameList, xmlFilesList

#### Make the list of csv files located in the directory

In [6]:
def extractCSVFiles(listOfFiles):
    csvFilesNameList = list()
    csvFilesList = list()
    for file in listOfFiles:
        if os.path.splitext(file)[1] == '.csv':
            csvFilesNameList += [os.path.basename(os.path.splitext(file)[0])]
            csvFilesList +=[file] 
    print('\nThere are ' + str(len(csvFilesNameList)) + ' CSV files\n')
    return csvFilesNameList, csvFilesList 

#### Function to extract file name

In [7]:
def getFileName(file):
    return str(os.path.basename(os.path.splitext(file)[0]))

#### Function to process given CSV file as required

In [8]:
def processCSV(file):
    #open the file
    infile = open(file, "r")
    
    # read content
    content = infile.readlines()
    infile.close()
    
    new_content=[]
        
    # Getting the name of the file
    ##filename= ((content[3]).split('\\')[-1]).strip()
    
    # Getting only the required lines from the output file
    for line in content[5:16]:
        strippedLine=line.strip()
        new_content.append(strippedLine)
    
    # Making DataFrame from the cleaned output data
    data = pd.DataFrame([sub.split(",") for sub in new_content])    
    new_header = data.iloc[0] #grab the first row for the header
    data = data[1:] #take the data less the header row
    data.columns = new_header #set the header row as the df header
    
    # Adding filename(Scenario) name as a new feature 
    data['Scenario Name']=getFileName(file)
    
    return data

In [9]:
def getCasualty(mainFile, csvFilesList):
    fileName = getFileName(mainFile)
    
    missilesDestroyed = [0]*10
    
    #rawDataList=[]
    
    for casFile in csvFilesList:
        new_content=[]
        
        if fileName + '_' in getFileName(casFile):

            infile = open(casFile, "r")
            # read content
            content = infile.readlines()
            infile.close()

            index = int(getFileName(casFile).split('_')[-1])

            # Getting the name of the file
            #filename= ((content[3]).split('\\')[-1]).strip()

            # Getting only the required lines from the output file

            for line in content[5:len(content)-1]:
                strippedLine=line.strip()
                new_content.append(strippedLine)


            rawData = pd.DataFrame([sub.split(",") for sub in new_content])
            new_header = rawData.iloc[0] #grab the first row for the header
            rawData = rawData[1:] #take the data less the header row
            rawData.columns = new_header #set the header row as the df header

            #rawDataList[index-1]=rawData

            missilesDestroyed[index-1]=len((rawData.loc[(rawData['squad']=='Red') & 
                                           (rawData['x']<='390')]).index)

    return missilesDestroyed

#### Function to get required parameters from the given XML configuration file and them to the given DataFrame

In [10]:
def readXML(file, data):
    
    XMLObj = minidom.parse(file)
    
    # Missile Quantity
    missileQTY=0

    for squad in XMLObj.getElementsByTagName('Squad'):
        for index in squad.getElementsByTagName('index'):
            if int(index.firstChild.data) in [10,11,12]:
                for agents in squad.getElementsByTagName('NumAgents'):
                    missileQTY+=int(agents.firstChild.data)
    
    # Stealth number for each missile class(10,11,12)
    
    missileStealth=[]

    for squad in XMLObj.getElementsByTagName('Squad'):
        for index in squad.getElementsByTagName('index'):
            if int(index.firstChild.data) in [10,11,12]:
                for state in squad.getElementsByTagName('state'):
                    for stealth in state.getElementsByTagName('Stealth'):
                        missileStealth.append(int(stealth.firstChild.data))
    missileClass_10_Stealth = missileStealth[0]
    missileClass_11_Stealth = missileStealth[1]
    missileClass_12_Stealth = missileStealth[2]
    
    # Asset quantity
    
    assetQTY=0

    for squad in XMLObj.getElementsByTagName('Squad'):
        for index in squad.getElementsByTagName('index'):
            if int(index.firstChild.data) in [1,2,3]:
                for agents in squad.getElementsByTagName('NumAgents'):
                    assetQTY+=int(agents.firstChild.data)
    
    
    # Interceptor Quantity
    interceptorQTY=0

    for squad in XMLObj.getElementsByTagName('Squad'):
        for index in squad.getElementsByTagName('index'):
            if int(index.firstChild.data) in [7,8,9]:
                for agents in squad.getElementsByTagName('NumAgents'):
                    interceptorQTY+=int(agents.firstChild.data)
    
    #Interceptor Sensor Range
    interceptorSensorRange=0

    for squad in XMLObj.getElementsByTagName('Squad'):
        for index in squad.getElementsByTagName('index'):
            if int(index.firstChild.data)==7:
                for state in squad.getElementsByTagName('state'):
                    for statename in state.getElementsByTagName('StateName'):
                        if statename.firstChild.data == ' Default State ': 
                            for sensorstate in state.getElementsByTagName('SensorState'):
                                for sensorstate in state.getElementsByTagName('SensorState'):
                                    for sensstclass in sensorstate.getElementsByTagName('SensStClass'):
                                        interceptorSensorRange = int(sensstclass.firstChild.data)

    # Interceptor Movement Speed
    interceptorMovementSpeed=0

    for squad in XMLObj.getElementsByTagName('Squad'):
        for index in squad.getElementsByTagName('index'):
            if int(index.firstChild.data)==7:
                for state in squad.getElementsByTagName('state'):
                    for statename in state.getElementsByTagName('StateName'):
                        if statename.firstChild.data == ' Contact State ': 
                            for ranges in state.getElementsByTagName('range'):
                                for rangename in ranges.getElementsByTagName('RangeName'):
                                    if rangename.firstChild.data == ' Movement Speed   ':
                                        for rangevalue in ranges.getElementsByTagName('RangeVal'):
                                            interceptorMovementSpeed = int(rangevalue.firstChild.data)
      
    # Interceptor Hit probability
    interceptorHitProbability=0

    for squad in XMLObj.getElementsByTagName('Squad'):
        for index in squad.getElementsByTagName('index'):
            if int(index.firstChild.data)==7:
                for state in squad.getElementsByTagName('state'):
                    for statename in state.getElementsByTagName('StateName'):
                        if statename.firstChild.data == ' Contact State ': 
                            for weaponstate in state.getElementsByTagName('WeaponState'):
                                for sskptable in weaponstate.getElementsByTagName('sskpTable'):
                                    for sskptablepoint in sskptable.getElementsByTagName('sskpTablePoint'): 
                                        for sskptableprob in sskptablepoint.getElementsByTagName('sskpTableProb'): 
                                            interceptorHitProbability=float(sskptableprob.firstChild.data)/10000
 
    data['Asset Amount']= assetQTY
    data['Missile Quantity'] = missileQTY
    data['Missile Class_10 Stealth'] = missileClass_10_Stealth
    data['Missile Class_11 Stealth'] = missileClass_11_Stealth
    data['Missile Class_12 Stealth'] = missileClass_12_Stealth
    data['Interceptor Quantity'] = interceptorQTY
    data['Interceptor Sensor Range'] = interceptorSensorRange
    data['Interceptor Speed'] = interceptorMovementSpeed
    data['Interceptor Hit Probability'] = interceptorHitProbability                              
        
    return data   

### Main part of the Program which calls above functions and does the iteration

In [31]:
dataFrameList =list()

# Get the path to directory of files
dir_path = getDirectory()

# Ask where to save final result in CSV format
savePath = saveFile()

# Get the list of all files(with their path) in the direcotry
listOfFiles = filesList(dir_path)

# Extract the list of XML file names from listOfFiles
xmlFilesNameList, xmlFilesList = extractXMLFiles(listOfFiles)

# Make the list of csv files located in the directory
csvFilesNameList, csvFilesList = extractCSVFiles(listOfFiles)


#process counter
pCntr=0 
step = len(xmlFilesList)//5
# Check if csv file has matching XML file 
for file in csvFilesList:
    if (getFileName(file) in xmlFilesNameList and 
        len(getFileName(file))>9):
        pCntr+=1

        # Process the CSV file, convert to DataFrame and add filename as column
        data = processCSV(file)
        
        # Adding new column with amount of missiles sucessfully destroyed by interceptors
        
        data['SucessfullyDestroyedMissiles']=getCasualty(file, csvFilesList)
               
        
        # Read respective XML file and add required information to the dataframe
        index = xmlFilesNameList.index(getFileName(file))
        data = readXML(xmlFilesList[index],data)

        # Add dataframe to the list
        dataFrameList.append(data)
        if pCntr==step:
            print('Processed ' + str(pCntr)+ ' out of ' + str(len(xmlFilesList)))
            step+=step
#print(len(dataFrameList))

# Stacking (merging) all dataframes    
finalDataFrame = pd.concat(dataFrameList)

#list of columns to drop
columnsToDrop=[' Sqd1Inj',' Sqd2Inj',' Sqd3Inj',' Sqd4Inj',' Sqd5Inj',
               ' Sqd6Inj',' Sqd7Inj',' Sqd8Inj',' Sqd9Inj',' Sqd10Inj',
               ' Sqd11Inj',' Sqd12Inj']


#dropping not required columns
finalDataFrame = finalDataFrame.drop(columns=columnsToDrop)

# adding new column "AssetSurvivalRate"
sqd1Cas = pd.to_numeric(finalDataFrame[' Sqd1Cas'])
sqd2Cas = pd.to_numeric(finalDataFrame[' Sqd2Cas'])
sqd3Cas = pd.to_numeric(finalDataFrame[' Sqd3Cas'])
totalAssetCas = sqd1Cas +sqd2Cas +sqd3Cas                                                     

finalDataFrame['AssetSurvivalRate'] = round(1-totalAssetCas/pd.to_numeric(finalDataFrame['Asset Amount']),2)

# adding new column "DefenseSuccessRate"
finalDataFrame['DefenseSuccessRate'] = round(pd.to_numeric(finalDataFrame['SucessfullyDestroyedMissiles'])/
                                             pd.to_numeric(finalDataFrame['Missile Quantity']),3)



# Convert final dataframe to CSV and export


finalDataFrame.to_csv(savePath, index=False, line_terminator='\n')

print("Total of " + str(pCntr) + " csv files were processed")


Selected directory is:
 C:/Users/arsha/OneDrive - Carleton University/2022W/DATA5000W/Group Project/MANA/DataFarming/4x8_

Selected folder to save is:
 C:/Users/arsha/OneDrive - Carleton University/2022W/DATA5000W/Group Project/MANA/DataFarming/Casulty/4x8_final_with_casulty.csv

There are 4766 files in the directory

There are 401 XML files


There are 4356 CSV files

Total of 396 csv files were processed


#### Function to generate sensor range based on "scenario name"

In [93]:
def getSensorRange(scenarioName):
    
    scenarioName = scenarioName.split('_')
    if scenarioName[-5]=='Sensor Range':
        sensorRange = int(scenarioName[-4])
    elif scenarioName[-5] == 'Table Sskp1 x100 (wp1)':
        sensorRange = int(scenarioName[-1])
    else:
        sensorRange = 100
    return sensorRange

# Program to select and combine multiple .CSV result files into  a single final .CSV 

In [96]:
# Get the path to directory of files
dir_path = getDirectory()

# Ask where to save final result in CSV format
savePath = saveFile()

# Get the list of all files(with their path) in the direcotry
listOfFiles = filesList(dir_path)

# Make the list of csv files located in the directory
csvFilesNameList, csvFilesList = extractCSVFiles(listOfFiles)

#combining all of the files in the list
dataFrameList =[pd.read_csv(file) for file in csvFilesList]
combinedFinalData = pd.concat([file for file in dataFrameList])


#Updating "sensorrange" column
combinedFinalData=combinedFinalData.drop(columns=['Interceptor Sensor Range'])

newlist=[]
for index, row in combinedFinalData.iterrows():
    scenarioName = combinedFinalData.iloc[index]['Scenario Name']
    sensorRange = getSensorRange(scenarioName)
    newlist.append(sensorRange)
    
combinedFinalData['Sensor Range'] = newlist 


# exporting csv file
combinedFinalData.to_csv(savePath, index=False, line_terminator='\n')



Selected directory is:
 C:/Users/arsha/OneDrive - Carleton University/2022W/DATA5000W/Group Project/MANA/DataFarming/Casualty

Selected folder to save is:
 C:/Users/arsha/OneDrive - Carleton University/2022W/DATA5000W/Group Project/MANA/DataFarming/FinalDataset_withCasualtyinfo.csv

There are 4 files in the directory

There are 4 CSV files



In [107]:
testingdata = combinedFinalData.drop(columns=['Sensor Range','Scenario Name'])

In [108]:
testingdata

Unnamed: 0,Run,Seed,Alleg1Cas(Blue),Alleg2Cas(Red),Blue Reach Goal,Red Reach Goal,Steps,Sqd1Cas,Sqd2Cas,Sqd3Cas,...,Asset Amount,Missile Quantity,Missile Class_10 Stealth,Missile Class_11 Stealth,Missile Class_12 Stealth,Interceptor Quantity,Interceptor Speed,Interceptor Hit Probability,AssetSurvivalRate,DefenseSuccessRate
0,1,-1735733161,6,4,No,No,800,0,0,0,...,3,4,0,0,0,8,100,1.0,1.00,1.000
1,2,-1296512345,7,4,No,No,800,0,0,1,...,3,4,0,0,0,8,100,1.0,0.67,0.750
2,3,-199038563,4,4,No,No,800,1,0,0,...,3,4,0,0,0,8,100,1.0,0.67,0.750
3,4,-1937978133,5,4,No,No,800,0,0,0,...,3,4,0,0,0,8,100,1.0,1.00,1.000
4,5,1383552751,7,4,No,No,800,0,1,0,...,3,4,0,0,0,8,100,1.0,0.67,0.750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3955,6,700172630,5,6,No,No,800,1,1,1,...,3,8,30,30,30,4,70,0.9,0.00,0.125
3956,7,1996864237,7,4,No,No,800,1,1,1,...,3,8,30,30,30,4,70,0.9,0.00,0.125
3957,8,-1813591623,5,6,No,No,800,1,1,1,...,3,8,30,30,30,4,70,0.9,0.00,0.125
3958,9,-1205776061,6,7,No,No,800,1,1,1,...,3,8,30,30,30,4,70,0.9,0.00,0.000


In [39]:
newlist=[]
for index in newfinaldata.index:
    value = newfinaldata.iloc[index]['Scenario Name']
    value = value.split('_')
    if value[8]=='Sensor Range':
        cellvalue = int(value[9])
    elif value[8]=='Movement Speed':
        cellvalue = 50
    elif value[8] == 'Table Sskp1 x100 (wp1)':
        cellvalue = int(value[-1])
    newlist.append(cellvalue)
    
newfinaldata['Sensor Range2'] = newlist    
    

In [74]:
name =combinedFinalData.iloc[772]['Scenario Name']

In [88]:
name.split('_')[-4]

'100'

In [89]:
name =combinedFinalData.iloc[48062]['Scenario Name']
name.split('_')[-4]

'100'

In [31]:
if value[8]=='Sensor Range':
    cellvalue = int(value[9])
elif value[8]=='Movement Speed':
    cellvalue = 50
elif value[8] == 'Table Sskp1 x100 (wp1)':
    cellvalue = int(value[-1])
print(cellvalue)

50


In [109]:
# Feature Extraction with PCA
import numpy
from pandas import read_csv
from sklearn.decomposition import PCA
# load data

array = testingdata.values
X = array[:,7:28]
Y = array[:,28]
# feature extraction
pca = PCA(n_components=3)
fit = pca.fit(X)
# summarize components
print("Explained Variance: %s" % fit.explained_variance_ratio_)
print(fit.components_)

Explained Variance: [0.62334367 0.36082125 0.00937662]
[[-3.27568957e-04 -3.47144274e-04 -4.77504534e-04 -4.26249220e-06
  -3.85185989e-34 -1.17549435e-38  2.95326264e-03  1.52251618e-03
   9.14385146e-04  4.89696223e-04  6.05542514e-04 -8.51184137e-06
   2.25624527e-03 -0.00000000e+00  1.60924325e-06 -5.77345036e-01
  -5.77345036e-01 -5.77345036e-01 -1.60924325e-06  3.12668174e-04
   2.81569267e-07]
 [-9.44040973e-04 -1.02793704e-02 -1.00785222e-02  2.21334118e-04
   2.46519033e-32  3.00926554e-36  1.41524173e-02  1.77356075e-02
   4.15154280e-03  8.04901194e-03  7.74059761e-03  2.03164913e-03
   6.24711810e-02  0.00000000e+00  7.01229585e-04  3.13370800e-04
   3.13370800e-04  3.13370800e-04 -7.01229585e-04  9.97610488e-01
   5.44115037e-04]
 [ 8.78449335e-02  1.63017466e-02  3.46612608e-02 -1.50496876e-04
  -8.47032947e-22 -6.61744490e-24 -6.54026384e-02  5.06135276e-03
  -4.58425046e-02  1.33770383e-01  1.79295528e-01  1.73293263e-01
   1.52194843e-01 -0.00000000e+00  6.63710814e-01

In [111]:

# Feature Importance with Extra Trees Classifier
from pandas import read_csv
from sklearn.ensemble import ExtraTreesClassifier
# load data
array = testingdata.values
X = array[:,7:28]
Y = array[:,28]
Y = Y.astype('float')
# feature extraction
model = ExtraTreesClassifier(n_estimators=10)
model.fit(X, Y)
print(model.feature_importances_)

ValueError: Unknown label type: 'continuous'