In [None]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import shutil
from pathlib import Path

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Loading packages

In [None]:
import sys
from pathlib import Path

here_path = Path().resolve()
repo_path = here_path.parents[1]
sys.path.append(str(repo_path))

In [None]:
from py.utils import verifyDir, verifyFile, verifyType

In [None]:
from py.config import Config

cfg = Config()

np.random.seed(cfg.RANDOM_STATE)
cfg.DATA_PATH, cfg.MODEL_PATH

In [None]:
RAW_PATH=f"{cfg.DATA_PATH}pp2/raw_data/"
QSCORE_PATH=f"{cfg.DATA_PATH}pp2/Qscores/"
IMAGES_PATH = f"{cfg.DATA_PATH}pp2/images/"
COMPARISONS_PATH=f"{cfg.DATA_PATH}pp2/comparisons/"

In [None]:
verifyDir(QSCORE_PATH)
verifyDir(COMPARISONS_PATH)

### Loading data

In [None]:
from py.datasets import PlacePulse

pp = PlacePulse()

In [None]:
places_df = pd.read_csv(f"{RAW_PATH}places.tsv", sep="\t").rename(columns={"_id":"place_id"})
# deleting data_resolution, locationdistribution, owner, polygon
places_df = places_df[["place_id", "place_name"]].rename(columns={"place_name": "City"})
# places_df.to_csv(f"{PROCESSED_PATH}places.csv", index=False)
places_df

### Studies/categories evaluated

In [None]:
studies_df = pd.read_csv(f"{RAW_PATH}studies.tsv", sep="\t").rename(columns={"_id":"study_id"})
# changing category names
studies_df["category"]=studies_df["study_question"].apply(lambda x: pp.map_category(x))
# deleting owner, study_name, study_public, study_question
studies_df = studies_df[["study_id", "category", "num_votes"]]
studies_df.sort_values(by="num_votes", inplace=True, ascending=False)
# studies_df.to_csv(f"{PROCESSED_PATH}studies.csv", index=False)
studies_df

### Locations

In [None]:
locations_df = pd.read_csv(f"{RAW_PATH}locations.tsv", sep="\t").rename(columns={"_id":"location_id","loc.0":"lat", "loc.1":"long"})
# merging with places
locations_df = pd.merge(locations_df, places_df, how="left", on="place_id")
# deleting heading, owner, pitch, type, votes, places_id
locations_df[['continent', 'country', 'city']] = locations_df.apply( lambda row: pp.localize_point(row['lat'], row['long']), axis=1, result_type='expand')
locations_df.sort_values(by=["lat", "long"], inplace=True)
locations_df.drop(columns=["City", "heading", "owner", "pitch", "type", "votes", "place_id"], inplace=True)
# locations_df.to_csv(f"{PROCESSED_PATH}locations.csv", index=False)
locations_df

#### Verifying by lat and long

In [None]:
locations_df.drop_duplicates(subset=["lat", "long"])

#### Verifying Images and locations

In [None]:
base_path = Path(IMAGES_PATH)
all_image_paths = np.sort([str(p.relative_to(base_path)) for p in base_path.glob('*/*.JPG')])
image_path_dict = {path.split("/")[-1].replace(".JPG",""): path for path in all_image_paths}

In [None]:
locations_df["image_path"] = locations_df["location_id"].map(image_path_dict)
locations_df

#### Identify missing images

In [None]:
fig, ax = plt.subplots(figsize=(16,8), nrows=1, ncols=1, sharex=False, sharey=False)

missing_df = locations_df[locations_df["image_path"].isna()].copy()
estado_df = missing_df["city"].value_counts().reset_index()

sns_fig = sns.barplot(
            data=estado_df,
            x=estado_df.columns[0],
            y=estado_df.columns[1],
            ax=ax,
            color="steelblue",
            order=estado_df.sort_values(estado_df.columns[1], ascending=False)[estado_df.columns[0]]
           )

sns_fig.set_title(f"Number of different Ids per image", fontsize=30)
sns_fig.set_ylabel(f"Number of missing images", fontsize=20)
sns_fig.set_xlabel('cities', fontsize=0)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='x', rotation=90, labelsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='y', labelsize=20)

# add annotation
_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

# add a space on y for the annotations
#sns_fig.margins(x=0.1)
ax.grid(True)

# fig.savefig(f'{PROCESSED_IMAGES_PATH}/repeated_images_by_locations.png')
plt.show()

#### Deleting missing images 

In [None]:
locations_df = locations_df[~locations_df["image_path"].isna()].copy()
locations_df

#### Mapping same locations

In [None]:
repetitions_df = pd.pivot_table(locations_df,
                      index=["lat", "long", "city", "country", "continent"],
                      values=["location_id", "image_path"],
                      aggfunc={
                          "location_id": list,
                          "image_path":list
                      }).reset_index()
repetitions_df["num_locations"] = repetitions_df["location_id"].apply(lambda x: len(x))
repetitions_df["image_id"] = repetitions_df["location_id"].apply(lambda x: x[0])

repetitions_df.sort_values(by=["num_locations"], inplace=True, ascending=False)
repetitions_df=repetitions_df[["image_id"] + repetitions_df.columns[:-1].to_list()].copy()
# repetitions_df.to_csv(f"{PROCESSED_PATH}repeated_locations.csv", sep=";", index=False)
repetitions_df

#### Identify images with repeated locations

In [None]:
fig, ax = plt.subplots(figsize=(16,8), nrows=1, ncols=1, sharex=False, sharey=False)

estado_df = repetitions_df["num_locations"].value_counts().reset_index()

sns_fig = sns.barplot(
            data=estado_df,
            x=estado_df.columns[0],
            y=estado_df.columns[1],
            ax=ax,
            color="steelblue",
            order=estado_df.sort_values(estado_df.columns[1], ascending=False)[estado_df.columns[0]]
           )

sns_fig.set_title(f"Number of different Ids per image", fontsize=30)
sns_fig.set_ylabel(f"Number of images", fontsize=20)
sns_fig.set_xlabel('Number of different ids', fontsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='x', rotation=0, labelsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='y', labelsize=20)

# add annotation
_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

# add a space on y for the annotations
#sns_fig.margins(x=0.1)
ax.grid(True)

# fig.savefig(f'{PROCESSED_IMAGES_PATH}/repeated_images_by_locations.png')
plt.show()

#### Number of images with more than 1 different ID per city

In [None]:
fig, ax = plt.subplots(figsize=(28,12), nrows=1, ncols=1, sharex=False, sharey=False)

estado_df = repetitions_df[repetitions_df["num_locations"] >1]["city"].value_counts().reset_index()

sns_fig = sns.barplot(
            data=estado_df,
            x=estado_df.columns[0],
            y=estado_df.columns[1],
            ax=ax,
            color="steelblue",
            #order=estado_df.sort_values(estado_df.columns[1], ascending=False)[estado_df.columns[0]]
           )

sns_fig.set_title(f"Number of locations with more than 1 image-IDs per city", fontsize=45)
sns_fig.set_ylabel(f"Number of images", fontsize=40)
sns_fig.set_xlabel('', fontsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='x', rotation=90, labelsize=30)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='y', labelsize=30)

# add annotation
_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

# add a space on y for the annotations
#sns_fig.margins(x=0.1)
ax.grid(True)

# fig.savefig(f'{PROCESSED_IMAGES_PATH}/number_images_with_different_id_per_city.png')
plt.show()

### Votes

In [None]:
votes_df = pd.read_csv(f"{RAW_PATH}votes.tsv", sep="\t").rename(columns={"choice": "winner", "_id":"vote_id", "left": "left_id", "right": "right_id"})
# filtering bad rows
votes_df["winner"] = votes_df['winner'].apply(lambda x: pp.map_match_result(x))
votes_df = votes_df[votes_df["winner"].isin(["right", "left", "equal"])]
# # merging with studies
votes_df = pd.merge(votes_df, studies_df, how="left", on="study_id")
# # # deleting vote_id, study_id, timestamp, voter_uniqueid
votes_df = votes_df[["vote_id", "left_id", "right_id", "winner", "category", "voter_uniqueid", "timestamp"]].copy()
# # filtering bad rows
votes_df = votes_df[~votes_df["left_id"].isin(["AxRsqyid", '${9999640+9999388}'])]
votes_df = votes_df[~votes_df["right_id"].isin(["AxRsqyid", '${9999640+9999388}'])]
votes_df.sort_values(by=["left_id", "right_id"], inplace=True)
# votes_df.to_csv(f"{PROCESSED_PATH}votes.csv", index=False)
votes_df

#### Initial ids (including repetitions)

In [None]:
left_ids = votes_df["left_id"].unique().tolist()
right_ids = votes_df["right_id"].unique().tolist()
total_ids = np.unique(np.concatenate([left_ids, right_ids]))
len(total_ids)

#### Mapping and deleting repetitions

In [None]:
r_df = repetitions_df[["location_id", "image_id"]].explode("location_id").reset_index(drop=True)
locations_dict = dict(zip(r_df["location_id"], r_df["image_id"]))
len(locations_dict), len( set( list(locations_dict.keys())) ), len( set( list(locations_dict.values())) )

In [None]:
votes_df["left_id"] = votes_df["left_id"].apply(lambda x: locations_dict[x] if x in locations_dict else x )
votes_df["right_id"] = votes_df["right_id"].apply(lambda x: locations_dict[x] if x in locations_dict else x)

In [None]:
left_id = votes_df["left_id"].unique().tolist()
right_id = votes_df["right_id"].unique().tolist()
total_ids = np.unique(np.concatenate([left_id, right_id]))
len(total_ids)

#### Verifying images compared with itself

In [None]:
c_votes_df = votes_df[votes_df['left_id'] != votes_df['right_id']].copy()

#### Verifying same pair comparisons in different position (left<->right)

In [None]:
len(c_votes_df["timestamp"].unique()), c_votes_df.shape

In [None]:
c_votes_df[c_votes_df.duplicated(subset=["timestamp"], keep=False)].sort_values(by=["timestamp"])

In [None]:
# left_df = c_votes_df.copy()
# left_df.rename(columns={"left_id": "evaluated_id", "right_id": "against_id"}, inplace=True)
# left_df["winner"] = left_df["winner"].apply(lambda x: "evaluated" if x == "left" else "against")

# right_df = c_votes_df.copy()
# right_df.rename(columns={"right_id": "evaluated_id", "left_id": "against_id"}, inplace=True)
# right_df["winner"] = right_df["winner"].apply(lambda x: "evaluated" if x == "right" else "against")

# pair_comparisons_df = pd.concat([left_df, right_df])
# pair_comparisons_df

In [None]:
# duplicated_comparisons = pair_comparisons_df[pair_comparisons_df.duplicated(subset=["evaluated_id", "against_id", "winner", "category", "voter_uniqueid", "timestamp"], keep=False)]
# duplicated_comparisons

#### Adding lat & long

In [None]:
# merging left city
c_votes_df = pd.merge(c_votes_df, locations_df.rename(columns={"location_id":"left_id"}), how="left", on="left_id")
c_votes_df = c_votes_df.rename(columns={"lat":"left_lat", "long":"left_long", "city":"left_city", "continent": "left_continent", "country": "left_country", "image_path": "left_image_path"})

#merging right city
c_votes_df = pd.merge(c_votes_df, locations_df.rename(columns={"location_id":"right_id"}), how="left", on="right_id")
c_votes_df = c_votes_df.rename(columns={"lat":"right_lat", "long":"right_long", "city":"right_city", "continent": "right_continent", "country": "right_country", "image_path": "right_image_path"})
c_votes_df.info()

#### Verifying duplicates by lat & long

In [None]:
c_votes_df[ (c_votes_df['left_lat'] == c_votes_df['right_lat']) & (c_votes_df['left_long'] == c_votes_df['right_long'])]

In [None]:
comparisons_df = c_votes_df.copy()

#### Deleting nan locations and paths

In [None]:
cols_to_check = [
    "left_lat",
    "left_long",          # (I assume you meant left_long, not left_ling)
    "left_image_path",
    "right_lat",
    "right_long",
    "right_image_path"
]

comparisons_df = comparisons_df.dropna(subset=cols_to_check).reset_index(drop=True)
comparisons_df

##### Counting comparisons

In [None]:
fig, ax = plt.subplots(figsize=(16,8), nrows=1, ncols=1, sharex=False, sharey=False)

estado_df = pp.get_comparisons_count(comparisons_df).reset_index()

sns_fig = sns.barplot(
            data=estado_df,
            x=estado_df.columns[0],
            y=estado_df.columns[1],
            ax=ax,
            color="steelblue",
            order=estado_df.sort_values(estado_df.columns[1], ascending=False)[estado_df.columns[0]]
           )

sns_fig.set_title(f"Number of comparisons per category", fontsize=30)
sns_fig.set_ylabel(f"Number of comparisons", fontsize=0)
sns_fig.set_xlabel('', fontsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='x', rotation=0, labelsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='y', labelsize=20)

# add annotation
_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

# add a space on y for the annotations
#sns_fig.margins(x=0.1)
ax.grid(True)

# fig.savefig(f'{PROCESSED_IMAGES_PATH}/number_comparisons_per_category.png')
plt.show()

##### Number of  evaluated images and not evaluated

In [None]:
fig, ax = plt.subplots(figsize=(16,8), nrows=1, ncols=1, sharex=False, sharey=False)

zero_counts = pp.get_evaluated_images_per_category(comparisons_df).reset_index()
print("Total unique images:", len(pp.get_evaluated_images(comparisons_df)))

sns_fig = sns.barplot(
            data=zero_counts,
            x=zero_counts.columns[0],
            y=zero_counts.columns[1],
            ax=ax,
            color="steelblue",
            order=zero_counts.sort_values(zero_counts.columns[1], ascending=False)[zero_counts.columns[0]]
           )

sns_fig.set_title(f"Number of images evaluated", fontsize=30)
sns_fig.set_ylabel(f"Number of locations", fontsize=0)
sns_fig.set_xlabel('', fontsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='x', rotation=0, labelsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='y', labelsize=20)

# add annotation
_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

# add a space on y for the annotations
#sns_fig.margins(x=0.1)
ax.grid(True)

# fig.savefig(f'{PROCESSED_IMAGES_PATH}/number_of_images_evaluated_per_category.png')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(16,8), nrows=1, ncols=1, sharex=False, sharey=False)

zero_counts = pp.get_evaluated_images_per_category(comparisons_df)
zero_counts = len(pp.get_evaluated_images(comparisons_df)) - zero_counts
zero_counts = zero_counts.reset_index()
print("Total unique images:", len(pp.get_evaluated_images(comparisons_df)))

sns_fig = sns.barplot(
            data=zero_counts,
            x=zero_counts.columns[0],
            y=zero_counts.columns[1],
            ax=ax,
            color="steelblue",
            order=zero_counts.sort_values(zero_counts.columns[1], ascending=False)[zero_counts.columns[0]]
           )

sns_fig.set_title(f"Number of images not evaluated", fontsize=30)
sns_fig.set_ylabel(f"Number of locations", fontsize=0)
sns_fig.set_xlabel('', fontsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='x', rotation=0, labelsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='y', labelsize=20)

# add annotation
_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

# add a space on y for the annotations
#sns_fig.margins(x=0.1)
ax.grid(True)

# fig.savefig(f'{PROCESSED_IMAGES_PATH}/number_of_images_not_evaluated_per_category.png')
plt.show()

##### Counting user votes

In [None]:
fig, ax = plt.subplots(figsize=(16,12), nrows=1, ncols=1, sharex=False, sharey=False)

estado_df = pp.get_voter_count(comparisons_df).reset_index()[:20]

sns_fig = sns.barplot(
            data=estado_df,
            x=estado_df.columns[0],
            y=estado_df.columns[1],
            ax=ax,
            color="steelblue",
            order=estado_df.sort_values(estado_df.columns[1], ascending=False)[estado_df.columns[0]]
           )

sns_fig.set_title(f"Number of user evaluations", fontsize=30)
sns_fig.set_ylabel(f"Number of comparisons", fontsize=0)
sns_fig.set_xlabel('', fontsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='x', rotation=90, labelsize=20)

# rotate the axis ticklabels
_ = sns_fig.tick_params(axis='y', labelsize=20)

# add annotation
_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

# add a space on y for the annotations
#sns_fig.margins(x=0.1)
ax.grid(True)

# fig.savefig(f'{PROCESSED_IMAGES_PATH}/number_evaluations_per_users.png')
plt.show()

##### Counting user votes per category

In [None]:
metrics = pp.get_metrics(comparisons_df)
metrics

In [None]:
for metric in metrics:
    verifyDir(f"{COMPARISONS_PATH}/{metric}/")
    city_comparisons_df = pd.pivot_table(comparisons_df[(comparisons_df["category"]==metric)],
                  index=["left_continent", "left_country", "left_city", "right_city", "right_country", "right_continent"],
                  values=["winner"],
                  aggfunc={
                      "winner":len
                  }).rename(columns={"winner":"num_comparisons"}).reset_index()
    city_comparisons_df.to_csv(f"{COMPARISONS_PATH}/{metric}/{metric}_comparisons.csv", sep=";", index=False)

In [None]:
for metric in metrics:
    # verifyDir(f"{PROCESSED_IMAGES_PATH}{metric}/")
    estado_df = pp.get_voter_count(comparisons_df, metric=metric).reset_index()[:20]
    
    fig, ax = plt.subplots(figsize=(16,12), nrows=1, ncols=1, sharex=False, sharey=False)

    sns_fig = sns.barplot(
                data=estado_df,
                x=estado_df.columns[0],
                y=estado_df.columns[1],
                ax=ax,
                color="steelblue",
                order=estado_df.sort_values(estado_df.columns[1], ascending=False)[estado_df.columns[0]]
               )

    sns_fig.set_title(f"Number of user evaluations for {metric}", fontsize=30)
    sns_fig.set_ylabel(f"Number of comparisons", fontsize=0)
    sns_fig.set_xlabel('', fontsize=20)

    # rotate the axis ticklabels
    _ = sns_fig.tick_params(axis='x', rotation=90, labelsize=20)

    # rotate the axis ticklabels
    _ = sns_fig.tick_params(axis='y', labelsize=20)

    # add annotation
    _ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

    # add a space on y for the annotations
    #sns_fig.margins(x=0.1)
    ax.grid(True)

    # fig.savefig(f'{PROCESSED_IMAGES_PATH}/{metric}/number_evaluations_per_users.png')
    plt.show()

##### Number of image comparisons frequency per category

In [None]:
# for metric in metrics:
#     verifyDir(f"{PROCESSED_IMAGES_PATH}{metric}/")
#     verifyDir(f"{PROCESSED_IMAGES_PATH}{metric}/cities/")
    
#     metric_comparisons_df = pd.read_csv(f"{RESULTS_PATH}{metric}/matches.csv", sep=";", low_memory=False)
#     num_comp_df = metric_comparisons_df["total_games"].value_counts().reset_index()
    
#     avg_comparison =np.sum(num_comp_df["count"].values*num_comp_df["total_games"].values)/np.sum(num_comp_df["total_games"].values)
    
#     #fig = num_comp_df.plot(kind="bar",figsize=(16,8)).get_figure()

#     fig, ax = plt.subplots(figsize=(16,8), nrows=1, ncols=1, sharex=False, sharey=False)

#     sns_fig = sns.barplot(
#             data=num_comp_df,
#             x=num_comp_df.columns[0],
#             y=num_comp_df.columns[1],
#             ax=ax,
#             color="steelblue",
#             order=num_comp_df.sort_values(num_comp_df.columns[1], ascending=False)[num_comp_df.columns[0]]
#            )

#     sns_fig.set_title(f"Comparisons average in {metric}: {round(avg_comparison,4)}", fontsize=30)
#     sns_fig.set_ylabel(f"Number of images", fontsize=20)
#     sns_fig.set_xlabel('Number of comparisons', fontsize=20)
    
#     # rotate the axis ticklabels
#     _ = sns_fig.tick_params(axis='x', rotation=0, labelsize=10)
    
#     # rotate the axis ticklabels
#     _ = sns_fig.tick_params(axis='y', labelsize=20)
    
#     # add annotation
#     #_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)
    
#     # add a space on y for the annotations
#     #sns_fig.margins(x=0.1)
#     ax.grid(True)
#     fig.savefig(f'{PROCESSED_IMAGES_PATH}/{metric}/number_comparisons_frequency.png')
#     plt.show()
#     plt.close(fig)
    
#     for current_city in metric_comparisons_df["city"].unique().tolist():
        
#         city_df = metric_comparisons_df[metric_comparisons_df["city"]==current_city].copy()
#         num_comp_df = city_df["total_games"].value_counts().reset_index()

#         avg_comparison =np.sum(num_comp_df["count"].values*num_comp_df["total_games"].values)/np.sum(num_comp_df["total_games"].values)


#         fig, ax = plt.subplots(figsize=(16,8), nrows=1, ncols=1, sharex=False, sharey=False)

#         sns_fig = sns.barplot(
#                 data=num_comp_df,
#                 x=num_comp_df.columns[0],
#                 y=num_comp_df.columns[1],
#                 ax=ax,
#                 color="steelblue",
#                 order=num_comp_df.sort_values(num_comp_df.columns[1], ascending=False)[num_comp_df.columns[0]]
#                )

#         sns_fig.set_title(f"Comparisons average in {current_city} - {metric}: {round(avg_comparison,4)}", fontsize=30)
#         sns_fig.set_ylabel(f"Number of images", fontsize=20)
#         sns_fig.set_xlabel('Number of comparisons', fontsize=20)

#         # rotate the axis ticklabels
#         _ = sns_fig.tick_params(axis='x', rotation=0, labelsize=10)

#         # rotate the axis ticklabels
#         _ = sns_fig.tick_params(axis='y', labelsize=20)

#         # add annotation
#         #_ = sns_fig.bar_label(sns_fig.containers[0], fmt='%0.0f', fontsize=15,rotation=0)

#         # add a space on y for the annotations
#         #sns_fig.margins(x=0.1)
#         ax.grid(True)
#         fig.savefig(f'{PROCESSED_IMAGES_PATH}/{metric}/cities/{current_city}_number_comparisons_frequency.png')
#         #plt.show()
#         plt.close(fig)

### Saving final file

In [None]:
%%time
comparisons_df.to_csv(f"{COMPARISONS_PATH}comparisons.csv", sep=";", index=False)

In [None]:
%%time
zip_path = f"{COMPARISONS_PATH}/comparisons.zip"
csv_path = f"{COMPARISONS_PATH}/comparisons.csv"

with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_LZMA) as zipObj:
    zipObj.write(csv_path, arcname="comparisons.csv")
    zipObj.close()