In [20]:
#Modules
import requests
import sys
import math

In [21]:
#Web Scraping
bigDict = {}        #Dictionary with (key: country name, value: [power index, military spending])
debug = False       #Set debug to false

#This method takes in an http address and collects the names of the countries at that address
    #This is necessary because each webpage at globalfirepower.com has the countries listed in order base on the specific statistic the page is displaying
def findNames(https):
    response = requests.get(https, timeout = 2)         #Make an http request to the address in the argument https, and limit the timeout to 2 seconds to prevent an unterminating script
    if debug:
        print(f"response: {response.text}")             
        print(type(response.text))

    lfnIndeces = []                                     #This array will hold all of the indeces of the long form name of each country
    countryNames = []                                   #This array will hold all of the names of those countries (long form name)

    if response.status_code == 200:                     #Check if the response was successful
        index = response.text.find('class="longFormName"')                  #Search through the html string to find index of the first instance of the class name
        lfnIndeces.append(index)                                            #Append that index to the index array
        while index != -1:                                                  #Repeat this step until the class name no longer exists in the string
            index = response.text.find('class="longFormName"', index+1)     #Only search through the portion of the string that we have not searched through yet
            if index != -1:                                                 
                lfnIndeces.append(index)                                    #Only add the index if it is not -1 (if the class name was found)
        if debug:
            print(lfnIndeces)
        for index in lfnIndeces:
            delimitor = response.text[index+93:index+200].find("<")                     #Find the first instance of the delimiter in the data string
            countryNames.append(response.text[index+93:index+93+delimitor-20])          #Append the substring from the beginning of the lfn to the delimiter (This should result in the entire lfn)
        return countryNames
    else:
        return []                                                           #Return an empty array if no country names appear in the page

#Power Index Scraping
powerIndexHTTPS = "https://www.globalfirepower.com/countries-listing.php"           #Webpage that we will scrape
response = requests.get(powerIndexHTTPS)                                            #Make http request
if len(findNames(powerIndexHTTPS)) == 0:                                            #Check if there are any country names in the page
    print("There was an error in processing your https request (Country Names)")
    sys.exit()

if response.status_code == 200:                                                     #Refer to comments from line 7-30
    powerIndeces = []
    pi_dict = {}
    index = response.text.find('class="pwrIndxContainer"')                          
    powerIndeces.append(float(response.text[index+84:index+90]))
    while index != -1:
        index = response.text.find('class="pwrIndxContainer"', index+1)
        if index != -1:
            powerIndeces.append(float(response.text[index+84:index+90]))            #Convert string to numeric
    if debug:
        print(powerIndeces)
    
    pi_dict = dict(zip(findNames(powerIndexHTTPS), powerIndeces))                   #Zip names with power indices
    if debug:
        print(pi_dict)
else:
    print("There was an error processing your https request (Power Indeces)")

#Military Budget Scraping
militaryBudgetHTTPS = "https://www.globalfirepower.com/defense-spending-budget.php"             #Refer to comments from line 7-30
response = requests.get(militaryBudgetHTTPS)
if len(findNames(militaryBudgetHTTPS)) == 0:
    print("There was an error in processing your https request (Names: Power Index)")
    sys.exit()

if response.status_code == 200:
    militaryBudgets = []
    mb_dict = {}
    index = response.text.find('class="valueContainer"')
    delimiter = response.text[index+227: index+300].find(" ")
    number = response.text[index+227:index+227+delimiter]
    while number.find(",") != -1:
        number = number.replace("," , "")                       #Remove commas from string so it can be converted to numeric
    militaryBudgets.append(float(number))
    while index != -1:
        index = response.text.find('class="valueContainer"', index+1)
        delimiter = response.text[index+227: index+300].find(" ")
        number = response.text[index+227:index+227+delimiter]
        while number.find(",") != -1:
            number = number.replace("," , "")
        if(index != -1):
            militaryBudgets.append(float(number))
    mb_dict = dict(zip(findNames(militaryBudgetHTTPS), militaryBudgets))
    if debug:
        print(mb_dict)
else:
    print("There was an error processing your https request (Military Budget)")

for key in pi_dict.keys():                                      #Merge power index and military budget dictionaries by the unique key: country name
    bigDict[key] = [pi_dict[key], mb_dict[key]]
print(bigDict)


#Convert military rankings from negative exponential to positive linear
outliers = {}
for key in bigDict.keys():
    bigDict[key] = [math.pow(math.e, (bigDict[key][0]-5)*-5)/100000000, bigDict[key][1]/1000000000]     #Linearize data according to its trend (hyperbolic function)
    if bigDict[key][1] > 200:
        outliers[key] = bigDict[key]    #Add outliers to outlier dictionary to be removed later
print(bigDict)

#Remove Outliers
for key in outliers.keys():
    del bigDict[key]


{'United States': [0.0699, 831781000000.0], 'Russia': [0.0702, 109000000000.0], 'China': [0.0706, 227000000000.0], 'India': [0.1023, 74000000000.0], 'South Korea': [0.1416, 44700000000.0], 'United Kingdom': [0.1443, 62816382000.0], 'Japan': [0.1601, 53000000000.0], 'Turkiye': [0.1697, 40000000000.0], 'Pakistan': [0.1711, 6349876689.0], 'Italy': [0.1863, 31600000000.0], 'France': [0.1878, 49730000000.0], 'Brazil': [0.1944, 24752000000.0], 'Indonesia': [0.2251, 25000000000.0], 'Iran': [0.2269, 9954451000.0], 'Egypt': [0.2283, 9400000000.0], 'Australia': [0.2515, 52559000000.0], 'Israel': [0.2596, 24400000000.0], 'Ukraine': [0.2598, 42000000000.0], 'Germany': [0.2847, 55941410000.0], 'Spain': [0.2882, 22000000000.0], 'Poland': [0.2917, 38360000000.0], 'Vietnam': [0.3158, 7900000000.0], 'Saudi Arabia': [0.3235, 71720000000.0], 'Taiwan': [0.3302, 19100000000.0], 'Thailand': [0.3389, 7700000000.0], 'Algeria': [0.3589, 21600000000.0], 'Canada': [0.3813, 26500000000.0], 'Argentina': [0.3823, 2

In [None]:

#This method takes in an http address and collects the names of the countries at that address
    #This is necessary because each webpage at globalfirepower.com has the countries listed in order base on the specific statistic the page is displaying
def findNames(https):
    response = requests.get(https, timeout = 2)         #Make an http request to the address in the argument https, and limit the timeout to 2 seconds to prevent an unterminating script
    if debug:
        print(f"response: {response.text}")             
        print(type(response.text))

    lfnIndeces = []                                     #This array will hold all of the indeces of the long form name of each country
    countryNames = []                                   #This array will hold all of the names of those countries (long form name)

    if response.status_code == 200:                     #Check if the response was successful
        index = response.text.find('class="longFormName"')                  #Search through the html string to find index of the first instance of the class name
        lfnIndeces.append(index)                                            #Append that index to the index array
        while index != -1:                                                  #Repeat this step until the class name no longer exists in the string
            index = response.text.find('class="longFormName"', index+1)     #Only search through the portion of the string that we have not searched through yet
            if index != -1:                                                 
                lfnIndeces.append(index)                                    #Only add the index if it is not -1 (if the class name was found)
        if debug:
            print(lfnIndeces)
        for index in lfnIndeces:
            delimitor = response.text[index+93:index+200].find("<")                     #Find the first instance of the delimiter in the data string
            countryNames.append(response.text[index+93:index+93+delimitor-20])          #Append the substring from the beginning of the lfn to the delimiter (This should result in the entire lfn)
        return countryNames
    else:
        return []                                                           #Return an empty array if no country names appear in the page

#Power Index Scraping
powerIndexHTTPS = "https://www.globalfirepower.com/countries-listing.php"           #Webpage that we will scrape
response = requests.get(powerIndexHTTPS)                                            #Make http request
if len(findNames(powerIndexHTTPS)) == 0:                                            #Check if there are any country names in the page
    print("There was an error in processing your https request (Country Names)")
    sys.exit()

if response.status_code == 200:                                                     #Refer to comments from line 7-30
    powerIndeces = []
    pi_dict = {}
    index = response.text.find('class="pwrIndxContainer"')                          
    powerIndeces.append(float(response.text[index+84:index+90]))
    while index != -1:
        index = response.text.find('class="pwrIndxContainer"', index+1)
        if index != -1:
            powerIndeces.append(float(response.text[index+84:index+90]))            #Convert string to numeric
    if debug:
        print(powerIndeces)
    
    pi_dict = dict(zip(findNames(powerIndexHTTPS), powerIndeces))                   #Zip names with power indices
    if debug:
        print(pi_dict)
else:
    print("There was an error processing your https request (Power Indeces)")

#Military Budget Scraping
militaryBudgetHTTPS = "https://www.globalfirepower.com/defense-spending-budget.php"             #Refer to comments from line 7-30
response = requests.get(militaryBudgetHTTPS)
if len(findNames(militaryBudgetHTTPS)) == 0:
    print("There was an error in processing your https request (Names: Power Index)")
    sys.exit()

if response.status_code == 200:
    militaryBudgets = []
    mb_dict = {}
    index = response.text.find('class="valueContainer"')
    delimiter = response.text[index+227: index+300].find(" ")
    number = response.text[index+227:index+227+delimiter]
    while number.find(",") != -1:
        number = number.replace("," , "")                       #Remove commas from string so it can be converted to numeric
    militaryBudgets.append(float(number))
    while index != -1:
        index = response.text.find('class="valueContainer"', index+1)
        delimiter = response.text[index+227: index+300].find(" ")
        number = response.text[index+227:index+227+delimiter]
        while number.find(",") != -1:
            number = number.replace("," , "")
        if(index != -1):
            militaryBudgets.append(float(number))
    mb_dict = dict(zip(findNames(militaryBudgetHTTPS), militaryBudgets))
    if debug:
        print(mb_dict)
else:
    print("There was an error processing your https request (Military Budget)")

for key in pi_dict.keys():                                      #Merge power index and military budget dictionaries by the unique key: country name
    bigDict[key] = [pi_dict[key], mb_dict[key]]
print(bigDict)


#Convert military rankings from negative exponential to positive linear
outliers = {}
for key in bigDict.keys():
    bigDict[key] = [math.pow(math.e, (bigDict[key][0]-5)*-5)/100000000, bigDict[key][1]/1000000000]     #Linearize data according to its trend (hyperbolic function)
    if bigDict[key][1] > 200:
        outliers[key] = bigDict[key]    #Add outliers to outlier dictionary to be removed later
print(bigDict)

#Remove Outliers
for key in outliers.keys():
    del bigDict[key]


In [22]:
# allows me to take the points and plot them in desmos to ensure accuracy
for value in bigDict.values():
    print(f"({value[1]}, {value[0]})")


(109.0, 506.90279331529)
(74.0, 431.73814388504184)
(44.7, 354.7166337868907)
(62.816382, 349.9601378176232)
(53.0, 323.37713930778966)
(40.0, 308.22167744272394)
(6.349876689, 306.07165954250775)
(31.6, 283.6721744325216)
(49.73, 281.55259149582594)
(24.752, 272.4129888244302)
(25.0, 233.64882869240031)
(9.954451, 231.55542368715774)
(9.4, 229.9401956151087)
(52.559, 204.75604703542854)
(24.4, 196.62910845725239)
(42.0, 196.4325776305858)
(55.94141, 173.4378385688673)
(22.0, 170.42907981861578)
(38.36, 167.47251630610427)
(7.9, 148.4605484654362)
(71.72, 142.8534466365051)
(19.1, 138.14712715616588)
(7.7, 132.26655679345726)
(21.6, 119.67972974149829)
(26.5, 106.99897511458146)
(2.544, 106.46531549983553)
(11.349506, 97.01050349042808)
(13.493, 93.2999205265655)
(14.824, 84.971784646386)
(5.974, 81.84434863517501)
(2.69534615, 71.04541042513789)
(4.1, 68.98018269645783)
(2.7, 52.13410056462833)
(3.5, 50.54273702169628)
(6.99504, 47.93372156752597)
(4.9089, 43.58962904184414)
(4.0, 43.

In [27]:
#Linear Regression Machine Learning Model

#Find the maximum and minimum possible slopes for initial minm and maxm to disinclude exessivly large and small guesses
maxSlope = sys.float_info.min
minSlope = sys.float_info.max
for key in bigDict.keys():
    localM = abs(bigDict[key][0] / (bigDict[key][1]+1))     #Local slope is calculated by y/(x+1) where y is strength and x is budget 
                                                                #1 is added to x as a negligible value to prevent divide by 0 errors
    if localM > maxSlope:
        maxSlope = localM                                   #If the local slope is greater than max slope, max slope = local slope
    elif localM < minSlope:
        minSlope = localM                                   #If the local slope is less than min slope, min slope = local slope
if debug:
    print(f"min: {minSlope}, max: {maxSlope}")


while(maxSlope-minSlope > 0.0000000001):                    #Continue the process until the difference between the max slope and min slope is negligible and is essentially the true slope
    predictedValuesMax = {}
    predictedValuesMin = {}
    residualsMax = {}
    residualsMin = {}
    m = (maxSlope - minSlope)/2 + minSlope      #This will be our next slope
    maxResid = 0
    minResid = 0
    for key in bigDict:
        predictedValuesMax[key] = maxSlope*bigDict[key][1]          #Find the predicted values using the upper bound slope
        predictedValuesMin[key] = minSlope*bigDict[key][1]          #Find the predicted values using the lower bound slope
    if debug:
        print(predictedValuesMax[key])
        print(predictedValuesMin[key])
    for key in bigDict:
        residualsMax[key] = bigDict[key][0] - predictedValuesMax[key]           #Find the residuals for each point
        residualsMin[key] = bigDict[key][0] - predictedValuesMin[key]
    for key in bigDict:
        maxResid += abs(residualsMax[key])                                      #Find the total residual for both slopes
        minResid += abs(residualsMin[key])                                          #abs is used because otherwise the residuals will cancel out and nothing will be achieved
    if debug:
        print(f"Max: {maxSlope}")
        print(f"Min {minSlope}")
    if maxResid < minResid:                 #Check which slope has the lower residual. If the upper bound has the lower residual, then the true slope must be between middle and the upper bound
        localResid = maxResid
        localSlope = maxSlope
        minSlope = m
    else:                                   #Otherwise the true slope must be between the middle and the lower bound
        localResid = minResid
        localSlope = minSlope
        maxSlope = m
    

print(m)

4.880050934315926


In [28]:
def predict(military_spending):
    powerIndex = m*military_spending        #Predict the power index by multiplying the military spending by the slope
    previous = ""
    wentIn = False
    #Find the country that has the most similar power index to yours
    for key in bigDict.keys():
        if powerIndex > bigDict[key][0]:                                                                    #Check if your power index is greater than the selected country
                                                                                                                #If it is, check which one it is closer to
            if previous == "United States":                                                                 #Edge case (maximum)
                print(f"Your country's power index is {powerIndex}. This is most similar to {previous}'s military")
            elif bigDict[previous][0]-powerIndex < powerIndex -bigDict[key][0]:
                print(f"Your country's power index is {powerIndex}. This is most similar to {previous}'s military")
            else:
                print(f"Your country's power index is {powerIndex}. This is most similar to {key}'s military")
            wentIn = True
            break
        previous = key
    if not wentIn:
        print(f"Your country's power index is {powerIndex}. This is most similar to Bhutan's military")     #Edge case (minimum)

In [30]:
#Predict the power score of a country given its military spending (in billions USD)
predict(0.14)
predict(10)
predict(25)

Your country's power index is 0.6832071308042297. This is most similar to Sudan's military
Your country's power index is 48.800509343159256. This is most similar to Bangladesh's military
Your country's power index is 122.00127335789814. This is most similar to Algeria's military
