# Vote Summarization

## PreAnalysis

### Loading Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
# Paths
RESULTS_PATH = os.path.join('..', 'results')
VOTES_PATH = os.path.join(RESULTS_PATH, 'all.csv')

#### Votes

The file for best techniques lists all the votes casted by a many people. Those votes are aprox. 3070. We will traslate the csv data (and its 4 columns), into just two arrays, one that constains the name of images, and another that represents the votes for techniques fort that image

In [3]:
votes_df = pd.read_csv(VOTES_PATH, sep=';', header=None, dtype='object')
votes_df.head(3)

Unnamed: 0,0,1,2,3
0,1222,pool_table,0.9999995,lime
1,1328,coil,0.99999607,ig
2,134,zebra,0.9999949,xrai


In [4]:
def gen_name_technique_tuples(x):
    return ['__'.join([str(x[0]), x[1], str(x[2])]) + '.jpg', x[3]]

In [5]:
vote_per_img = votes_df.apply(gen_name_technique_tuples, axis=1)
vote_per_img.values[:3] # Values of Series obj

array([list(['1222__pool_table__0.9999995.jpg', 'lime']),
       list(['1328__coil__0.99999607.jpg', 'ig']),
       list(['134__zebra__0.9999949.jpg', 'xrai'])], dtype=object)

In [6]:
vote_per_img.shape[0]

3070

Onece we have the dictionary, we will calculate, for each image, the number of votes casted for each tecnique, and also, we will determine wich technique is the best

In [20]:
# For each image, count votes for each technique
unique_names = []
tech_counts = []
for i, (name, tech) in enumerate(vote_per_img.values):
    if name not in unique_names:
        unique_names.append(name)
        tech_counts.append([0,0,0,0])
    if tech=='ig': tech_idx=0
    elif tech=='lime': tech_idx=1
    elif tech=='xrai': tech_idx=2
    elif tech=='anchor': tech_idx=3
    else:
        print(f'Skipping invalid technique : {tech}')
        continue
    name_idx = unique_names.index(name)
    tech_counts[name_idx][tech_idx] += 1

# Delete fake images (like "nan_anchor_nan.jpg")
for i in range(4):
    unique_names.pop()
    tech_counts.pop()

# Compute best technique for each image
for i, tech_count in enumerate(tech_counts):
    best_tech_idx = np.argmax(tech_count) # tech_count.index(max(tech_count))
    if best_tech_idx==0: best_tech='ig'
    elif best_tech_idx==1: best_tech='lime'
    elif best_tech_idx==2: best_tech='xrai'
    elif best_tech_idx==3: best_tech='anchor'
    tech_counts[i].append(best_tech)

Skipping invalid technique : nan
Skipping invalid technique : nan
Skipping invalid technique : nan
Skipping invalid technique : nan
Skipping invalid technique : nan


In [23]:
len(unique_names), len(tech_counts)

(198, 198)

In [8]:
foo = np.array(tech_counts)
bar = foo[:,:-1].astype(np.int8)
np.sum(bar, axis=1)

array([30, 27, 26, 25, 25, 23, 23, 23, 23, 23, 23, 23, 22, 21, 21, 21, 21,
       21, 20, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16,
       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14,
       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
       14, 14, 14, 14, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
       14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14])

Then, we can convert the data into a dataframe, so we can store it.

In [9]:
best = pd.DataFrame(tech_counts, index=unique_names, columns=['ig', 'lime', 'xrai', 'anchor', 'best'])
best.head(5)

Unnamed: 0,ig,lime,xrai,anchor,best
1222__pool_table__0.9999995.jpg,12,14,3,1,lime
1328__coil__0.99999607.jpg,18,4,3,2,ig
134__zebra__0.9999949.jpg,14,1,9,2,ig
2377471__pizza__0.9999988.jpg,22,0,1,2,ig
2377620__zebra__0.9999882.jpg,13,5,6,1,ig


In [10]:
temp_path = os.path.join(RESULTS_PATH, 'votes_summary.csv')
best.to_csv(temp_path)