In [None]:
import pandas as pd
import numpy as np
from functools import reduce
import random

In [None]:
production_df = pd.read_csv('/content/drive/MyDrive/data/Asia - Production_processed.csv')
export_df = pd.read_csv('/content/drive/MyDrive/data/Asia - Export_processed.csv')
df = pd.concat([production_df, export_df])
neighboring_countries_df = pd.read_csv('https://raw.githubusercontent.com/evpu/Bordering-Countries/master/neighbors.csv')

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(neighboring_countries_df)

In [None]:
def set_year(df, last_year):
  for year in df['Year']:
    if year > last_year:
      df = df[~df['Year'].isin([year])]
  return df

set_year(df, 2015)

Unnamed: 0.1,Unnamed: 0,Area Code,Area,Item Code,Item,Year,Value
0,0,2,Afghanistan,4022,Diammonium phosphate (DAP),2013,3346.00
1,1,2,Afghanistan,4022,Diammonium phosphate (DAP),2014,3346.00
2,2,2,Afghanistan,4022,Diammonium phosphate (DAP),2015,3346.00
4,4,2,Afghanistan,4001,Urea,2002,50900.00
5,5,2,Afghanistan,4001,Urea,2003,35230.00
...,...,...,...,...,...,...,...
6817,6817,249,Yemen,4024,Other NP compounds,2006,48.50
6818,6818,249,Yemen,4014,"Other phosphatic fertilizers, n.e.c.",2009,56.25
6819,6819,249,Yemen,4014,"Other phosphatic fertilizers, n.e.c.",2010,1.48
6820,6820,249,Yemen,4011,Phosphate rock,2009,8.00


In [None]:
#all distinct items and countries
items = [item for item in df.drop_duplicates(subset=['Item'])['Item']]
countries = [country for country in df.drop_duplicates(subset=['Area'])['Area']]
print(items)
print(countries)

In [None]:
total_values = dict()
year = 2015
df = set_year(df, 2015)
for item in items:
  total_values.update({item: df['Value'][df['Item'] == item].sum()})
items_sorted_by_production_and_export =  {k: v for k, v in sorted(total_values.items(), key=lambda item: item[1], reverse=True)}
for item, total_value in items_sorted_by_production_and_export.items():
  print(item, total_value)

Urea 1013982686.07
Potassium chloride (muriate of potash) (MOP) 249266572.05
NPK fertilizers 177852456.77999997
Phosphate rock 171310052.56
Diammonium phosphate (DAP) 164307468.10999998
Ammonia, anhydrous 162747072.61
Ammonium nitrate (AN) 131158423.13
Ammonium sulphate 122536795.08999999
Other NP compounds 95302979.58000001
Superphosphates above 35% 59568649.8
Superphosphates, other 41218656.0
Monoammonium phosphate (MAP) 40593781.529999994
Urea and ammonium nitrate solutions (UAN) 26248648.7
Calcium ammonium nitrate (CAN) and other mixtures with calcium carbonate 24454647.060000002
Other nitrogenous fertilizers, n.e.c. 17388906.22
Fertilizers n.e.c. 9888023.2
Other potassic fertilizers, n.e.c. 7975366.630000001
Potassium nitrate 5733327.44
Potassium sulphate (sulphate of potash) (SOP) 5435555.68
Other phosphatic fertilizers, n.e.c. 4146970.09
PK compounds 805353.62
Sodium nitrate 498442.08
Other NK compounds 49802.0


In [None]:
#finding neighbors for each country "from neighboring_countries_df"
neighborhood = neighboring_countries_df.groupby(['country_name'], as_index = False).agg({'neighbor_name': '-'.join})
neighborhood = neighborhood.set_index('country_name').T.to_dict('list')
for country, neighbors in neighborhood.copy().items():
  neighborhood[country] = (neighborhood[country])[0].split('-')

In [None]:
#correct countries and neighbors names and make them like country names in df
def name_converter(country_name_to_convert):
  matchings = [country_name for country_name in countries if country_name_to_convert in country_name]
  if matchings:
    return matchings

misspells = {'Vietnam': 'Viet Nam', 'South Korea':'Republic of Korea', 'North Korea' : 'Republic of Korea'}

converted_neighborhood_names = []
for country, neighbors in neighborhood.items():
  for neighbor in neighbors:
    if neighbor in misspells:
      neighbors[neighbors.index(neighbor)] = misspells[neighbor]
  if country in misspells:
    country = misspells[country]
  if name_converter(country):
    country = name_converter(country)
    neighbors = [name_converter(country_name) for country_name in neighbors]
    converted_neighborhood_names.append([country, neighbors])

country_neighbors = dict()

for country in converted_neighborhood_names:
  #remove None values
  country[1] = [i for i in country[1] if i]
  #flatten
  country[1] = reduce(lambda z, y :z + y, country[1])

  country_neighbors.update({country[0][0]: country[1]})

In [None]:
def find_neighbors(country):
  if country in country_neighbors:
    return country_neighbors[country]
  return country


#check for an item in a particular country
def contain_item(country, item):
  if len(df.loc[(df['Area'] == country) & (df['Item'] == item)]):
    return True


def neighbor_containing_item(country, item):
  for neighbor in find_neighbors(country):
    if contain_item(neighbor, item):
      return True


def best_items_for_country(country):
  best_items = items_sorted_by_production_and_export.copy()
  for item, value in best_items.copy().items():
    if neighbor_containing_item(country, item):
      value_to_increase = best_items[item] * 1.5
      best_items[item] = value_to_increase
  return {k: v for k, v in sorted(best_items.items(), key=lambda item: item[1], reverse=True)}

print(best_items_for_country('Afghanistan'))

{'Urea': 1520974029.105, 'Potassium chloride (muriate of potash) (MOP)': 373899858.07500005, 'NPK fertilizers': 266778685.16999996, 'Phosphate rock': 256965078.84, 'Diammonium phosphate (DAP)': 246461202.16499996, 'Ammonia, anhydrous': 244120608.91500002, 'Ammonium nitrate (AN)': 196737634.695, 'Ammonium sulphate': 183805192.635, 'Other NP compounds': 142954469.37, 'Superphosphates above 35%': 89352974.69999999, 'Superphosphates, other': 61827984.0, 'Monoammonium phosphate (MAP)': 60890672.29499999, 'Urea and ammonium nitrate solutions (UAN)': 39372973.05, 'Calcium ammonium nitrate (CAN) and other mixtures with calcium carbonate': 36681970.59, 'Other nitrogenous fertilizers, n.e.c.': 26083359.33, 'Fertilizers n.e.c.': 14832034.799999999, 'Other potassic fertilizers, n.e.c.': 11963049.945, 'Potassium nitrate': 8599991.16, 'Potassium sulphate (sulphate of potash) (SOP)': 8153333.52, 'Other phosphatic fertilizers, n.e.c.': 6220455.135, 'PK compounds': 1208030.43, 'Sodium nitrate': 747663.

**GA Part**

**Creating initial population**

In [None]:
def make_first_population(population_size):
  initial_population = list()
  for i in range(population_size):
    initial_population.append(list(np.random.permutation(items)))
  return initial_population
for i, j in enumerate(make_first_population(65)):
  print(i, j)

Fitness Function

In [None]:
def fitness(chromosome, country):
  return len([index for index, (e1, e2) in enumerate(zip(chromosome, best_items_for_country(country))) if e1 == e2]) 

def has_goal(generation, country):
  for chromosome in generation:
    if fitness(chromosome, country) == len(chromosome):
      return True
  return False

def sort_by_fitness(population, country):
  fitnesses = dict()
  for chromosome in population:
    fitnesses.update({tuple(chromosome) : fitness(chromosome, country)})
  sorted_chromosomes = [list(chromosome) for chromosome in list({k: v for k, v in sorted(fitnesses.items(), key=lambda item: item[1], reverse = True)}.keys())]
  sorted_fitnesses = {k: v for k, v in sorted(fitnesses.items(), key=lambda item: item[1], reverse = True)}.values()
  return(sorted_chromosomes, sorted_fitnesses)


initial_population = make_first_population(65)
sort_by_fitness(initial_population, 'Turkey')

def best_parents_in_population(population, n, country):
  return ((sort_by_fitness(population, country))[0])[:n]

print(best_parents_in_population(initial_population, 30, 'Turkey'))

Mutation

In [None]:
def swap_position(array):
  random_index = random.randint(0,len(array)-1)
  if random_index == len(array)-1:
    array[-1], array[-2] = array[-2], array[-1]
  elif random_index == 0:
    array[0], array[1] = array[1], array[0]
  else:
    array[random_index], array[random_index-1] = array[random_index-1], array[random_index]
  return array

def mutate(chromosome):
  chromosome = swap_position(chromosome)
  return chromosome

a = ['1', '2', '3', '4', '5']
print(mutate(a))

['1', '2', '3', '5', '4']


Crossover

In [None]:
def place_a_gene(chromosome, child):
    for child_gene in child:
        for gene in chromosome:
            if child_gene == 'empty' and gene not in child:
                child[child.index(child_gene)] = gene
                return child

def crossover(parent1, parent2):
    point = int(len(parent1) / 4)
    child = ['empty' for i in range(len(parent1) - point)]
    child = parent1[:point] + child
    for chid_gene in child:
        child_index_to_fill = child.index(chid_gene)
        if child_index_to_fill % 2 == 0:
            place_a_gene(parent1, child)
        else:
            place_a_gene(parent2, child)
    return child

def crossover_in_population(best_parents, number_of_childs):
  new_generation = []
  for i in range(number_of_childs):
    rnd1 = random.randint(0, len(best_parents) - 1)
    rnd2 = random.randint(0, len(best_parents) - 1)
    parent1 = best_parents[rnd1]
    parent2 = best_parents[rnd2]
    mutate(chromosome)
    if random.randint(1, 5) == 1:
      new_generation.append(mutate(crossover(parent1, parent2)))
    else:
      new_generation.append(crossover(parent1, parent2))
  return new_generation

a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p']
t = list(np.random.permutation(a))
s = list(np.random.permutation(a))
print(f'dad:{t}')
print(f'mum:{s}')
print(f'child:{crossover(t, s)}')


dad:['p', 'k', 'b', 'c', 'e', 'l', 'g', 'o', 'j', 'f', 'm', 'd', 'a', 'n', 'i', 'h']
mum:['a', 'j', 'c', 'p', 'o', 'm', 'f', 'e', 'd', 'i', 'b', 'n', 'g', 'h', 'k', 'l']
child:['p', 'k', 'b', 'c', 'e', 'a', 'l', 'j', 'g', 'o', 'f', 'm', 'd', 'i', 'n', 'h']


In [None]:
def GA(country):
  crossover_probability = 0.80
  initial_population_size = 100
  selected_parents_for_crossover = 30
  child_in_gen = 50
  num_to_crossover = crossover_probability * initial_population_size
  num_to_next_gen = initial_population_size - num_to_crossover
  population = make_first_population(initial_population_size)
  generation_number = 1
  while(not has_goal(population, country)):
    best_parents = best_parents_in_population(population, selected_parents_for_crossover, country)
    population = crossover_in_population(best_parents, child_in_gen)
    population = population + best_parents[0:int(num_to_next_gen)]
    print(f'generation number = {generation_number}\nbest fitness in this generation {list(sort_by_fitness(population, country)[1])[0]}')
    print(population)
    generation_number += 1

  print(f'best items for {country} in {year}: {sort_by_fitness(population, country)[0]}')


GA('Afghanistan')