Done by:

Tymofii Kuzmenko (20221690)

Artem Khomitskyi (20221686)

David Azarov (20221688)

Step 1: Define the best option using profits provided

In [None]:
import numpy as np
%run '/fixed_data.py'

for case in data:
  # Choosing the max profit and make it the best option
  activities = ['foraging', 'farming', 'mining', 'animal_care', 'fishing']
  profits = [case['profit_foraging'], case['profit_farming'], case['profit_mining'], case['profit_animal_care'], case['profit_fishing']]
  best_option = activities[np.argmax(profits)]
  case['best_option'] = best_option
  
  # Deleting all profits because we don't need them anymore
  del case['profit_foraging']
  del case['profit_farming']
  del case['profit_mining']
  del case['profit_animal_care']
  del case['profit_fishing']

Step 2: Remove all empty data

In [None]:
# Cleaning our data from None values 
data = [case for case in data if not any(value is None for value in case.values())]

Step 3: Encode our data

In [None]:
# Mapping all unique categorial values
season_mapping = {'Spring': 0, 'Summer': 1, 'Fall': 2, 'Winter': 3}
spirits_mapping = {'very displeased': 0, 'displeased': 1, 'neutral': 2, 'pleased': 3, 'very pleased': 4}
fish_category_mapping = {'A': 0, 'A1': 1, 'B': 2, 'B1': 3, 'C': 4, 'C1': 5, 'D': 6, 'D1': 7}
forage_category_mapping = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
available_quest_mapping = {'profit_foraging': 0, 'profit_farming': 1, 'profit_mining': 2, 'profit_animal_care': 3, 'profit_fishing': 4}
best_option_mapping = {'foraging': 0, 'farming': 1, 'mining': 2, 'animal_care': 3, 'fishing': 4}

# Changing data from categorial to numeric through loop
for case in data:
    case['season'] = season_mapping[case['season']]
    case['spirits'] = spirits_mapping[case['spirits']]
    case['fish_category'] = fish_category_mapping[case['fish_category']]
    case['forage_category'] = forage_category_mapping[case['forage_category']]
    case['available_quest'] = available_quest_mapping[case['available_quest']]
    case['best_option'] = best_option_mapping[case['best_option']]

Step 4: Shuffling and spliting of data

In [None]:
def shuffle_and_split(data):
  # Randomly shuffling our data
  np.random.shuffle(data)
  
  # Spliting in two lists assessor(35% from all data) and helper(65% from all data)
  split_index = int(0.65 * len(data))
  helper = data[:split_index]
  assessor = data[split_index:]
  return helper,assessor

Step 5: Creating suggestions

In [None]:
# Function calculates euclidean distance
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

# Function finds euclidean distance between x and all other arrays in helper then finds 6 nearest and choose best option 
def make_suggestion(x, helper):
    distances = [euclidean_distance(x, h[0]) for h in helper]
    sorted_indices = np.argsort(distances)
    k_nearest_indices = sorted_indices[:6]
    k_nearest_best_options = [helper[i][1] for i in k_nearest_indices]
    suggestion = np.bincount(k_nearest_best_options).argmax()
    return suggestion

# Convert helper array into feature vectors
def create_feature_vectors(helper,assessor):
  # Turn helper into list of tuples(numpy array, best_option) to have more easy work with data
  helper_vectors = []
  for case in helper:
      x = np.array([case['season'], case['day'], case['spirits'], int(case['plot_fertilized']), case['fish_category'], case['mining_level'], int(case['desert_unlocked']), int(case['special_crops']), case['forage_category'], int(case['is_raining']), int(case['tomorrow_rain']), case['current_money'], case['available_quest']])
      suggestion = case['best_option']
      helper_vectors.append((x, suggestion))
  
  # Create new numpy array from assessor without best option and pulling in function make_suggestion
  for case in assessor:
      x = np.array([case['season'], case['day'], case['spirits'], int(case['plot_fertilized']), case['fish_category'], case['mining_level'], int(case['desert_unlocked']), int(case['special_crops']), case['forage_category'], int(case['is_raining']), int(case['tomorrow_rain']), case['current_money'], case['available_quest']])
      suggestion = make_suggestion(x, helper_vectors)
      case['suggested_option'] = suggestion
  return assessor

Step 6: Calculate success scores for each activity

In [None]:
# Function for calculating success scores for each activity separately
def calculate_success_scores(assessor):

  # All our activities
  activities = ['foraging', 'farming', 'mining', 'animal_care', 'fishing']
  
  # This dictionary is for storing RA, WS and MS for each activity
  activity_counts = {activity: {'RA': 0, 'WS': 0, 'MS': 0} for activity in activities}
    
  # Loop for calculating Right Answers, Wrong Suggestions and Missing Suggestions for each activity
  for case in assessor:
      best_option = activities[case['best_option']]
      suggested_option = activities[case['suggested_option']]

      if best_option == suggested_option:
          activity_counts[best_option]['RA'] += 1
      else:
          activity_counts[best_option]['MS'] += 1
          activity_counts[suggested_option]['WS'] += 1

  # Dictionary for storing and then returning all success scores
  success_scores = {}
  
  # Loop for calculating success scores for activities
  for activity in activities:
      RA = activity_counts[activity]['RA']
      WS = activity_counts[activity]['WS']
      MS = activity_counts[activity]['MS']
      if RA!=0:
        S1 = RA / (RA + WS)
        S2 = RA / (RA + MS)
        SCS = 2 * S1 * S2 / (S1 + S2)
      else:
        SCS = 0
      success_scores[activity] = SCS
  return success_scores

Step 7: Iterating phase(loop 10 times the algorithm)

In [None]:
# Algorithm_results is dictionary for storing all our success scores for each activity 
algorithm_results = {'foraging': [], 'farming': [], 'mining': [], 'animal_care': [], 'fishing': []}

# Looping our algorithm 10 times and taking data from it
for i in range(0,10):
  helper,assessor = shuffle_and_split(data)
  scs = calculate_success_scores(create_feature_vectors(helper,assessor))
  for key in algorithm_results.keys():
    algorithm_results[key].append(scs[key])

Step 8: Calculating all main statistic properties

In [None]:
# Stat is a dictionary for storing all statistic properties that we will receive
stat = {'foraging': {}, 'farming': {}, 'mining': {}, 'animal_care': {}, 'fishing': {}}

# Loop is for going through our data in algorithm results and calculate statistic properties
for key in algorithm_results.keys():
  stat[key]['25th percentile'] = np.percentile(algorithm_results[key],25)
  stat[key]['Median'] = np.median(algorithm_results[key])
  stat[key]['75th percentile'] = np.percentile(algorithm_results[key],75)
  stat[key]['Mean'] = np.mean(algorithm_results[key])
  stat[key]['Stdev'] = np.std(algorithm_results[key])

Step 9: Printing received data

In [None]:
# Function for separating rows from each
def print_row_separator():
    for width in column_widths:
        print('+' + '-' * (width + 2), end='')
    print('+')

# Function for output row of data
def print_data_row(row):
    for i, column in enumerate(columns):
      if type(row[i]) is np.float64:
        element = round(row[i],4)
      else:
        element = row[i]
      print(f'| {element:{column_widths[i]}} ', end='')
    print('|')

# Getting names for columns
columns = ['']+list(stat.keys())

# Getting rows of data
rows = []
for key in stat['foraging'].keys():
  rows.append([key])  

for activity in stat.keys():
  i = 0
  for key in stat[activity].keys():
    rows[i].append(stat[activity][key]) 
    i += 1 
    
# Define th wigth of columns
column_widths = [15,15,15,15,15,15]

# Output the table
print_row_separator()
print_data_row(columns)
print_row_separator()
for row in rows:
    print_data_row(row)
    print_row_separator()

+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+
|                 | foraging        | farming         | mining          | animal_care     | fishing         |
+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+
| 25th percentile |          0.2349 |          0.0955 |            0.26 |          0.0307 |          0.3472 |
+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+
| Median          |          0.2446 |          0.1033 |          0.2773 |           0.049 |          0.3676 |
+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+
| 75th percentile |          0.2632 |          0.1119 |          0.2867 |          0.0962 |          0.3801 |
+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+
| Mean    