# Assumption and  Adversial Model

We assume that each IP address is unique and associated to one user and that IP address does not change overt time, it is not necessary a realistic assumption since the ISP provide an random address ip if the user use 4g for example.
We assume that 2 successive request send in the same cell count has 1 visit in the cell.
We assume the connection is encrypted so the adversary cannot eavesdrop and read the packets but we assume that if the adversary have access to any data so it have access to all presented in section 2.1 and that the adversary can do data analysis. More concretely the service provider is the adversary and search to know information about indivual users.

These assumptions are usefull for privacy analysis because they give to the adversary a possibility to exploits data while not being unrealistic.

# Attack Strategy

## frequency attack

With a given ip we can count the number of time the user with this ip visited a given cell or given type of POI to learn what the hobbies of the user are and where he lives. We could also infer with timestamp to learn where the user work and what are his work hours. After this we can compare between different users if they are visiting the same place at the same time and learning which users are knowing each other. 

# Demonstration

In [1]:
from query import get_nearby_pois
from os import path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
def parse_queries():
    query_by_user = {}
    cwd = path.abspath('')
    queries = np.loadtxt(path.join(cwd, 'queries.csv'), delimiter=" ", dtype=object, skiprows=1)
    
    for query in queries:
        if query[0] in query_by_user:
            query_by_user[query[0]].append({'location': np.array([float(query[1]), float(query[2])]), 
                                            'timestamp': int(query[3].split(".")[0]), 'filter': query[4]})
        else:
            query_by_user[query[0]] = [{'location': np.array([float(query[1]), float(query[2])]), 
                                            'timestamp': int(query[3].split(".")[0]), 'filter': query[4]}]
    return query_by_user
    
def parse_pois():
    pois_by_id = {}
    with open('pois.csv') as file:
        next(file)
        for line in file.readlines():
            line = line.split()
            pois_by_id[int(line[0])] = {"cell":int(line[1]), "type": line[2], 
                                        "location": (float(line[3]), float(line[4]))}
    return pois_by_id

query_by_user = parse_queries()
pois_by_id = parse_pois()

In [3]:
## create plot for type of pois visited
def plot_type_searched(queries):
    for user, queries in queries.items():
        pois_type_visited = {}
        for query in queries:
            if query['filter'] in pois_type_visited:
                pois_type_visited[query['filter']] += 1
            else:
                pois_type_visited[query['filter']] = 1
                
        types = list(pois_type_visited.keys())
        nb_of_visit = list(pois_type_visited.values())
        
        fig = plt.figure(figsize = (10, 5))
        plt.bar(types, nb_of_visit, width = 0.4)
        
        plt.xlabel("poi type")
        plt.ylabel("Number of visit")
        plt.title("visited type by {user_id}".format(user_id=user))
        path_name = "figures/type_searched/type_searched_{user_id}.png".format(user_id = user)
        plt.savefig(path_name)
        plt.close()

plot_type_searched(query_by_user)

In [4]:
## find most visited cells
def plot_visited_cells(queries, pois):
    for user, queries in queries.items():
        cell_visited = {}
        max_id = 0
        previous_cell = 0
        
        for query in queries:
            nearby_pois = get_nearby_pois(query['location'], query['filter'])
            #as all nearby_pois are in same cell, we can just take the first poi of the list and retrive its cell
            if nearby_pois:
                cell = pois[nearby_pois[0]]['cell']
                if cell > max_id:
                    max_id = cell
                if cell != previous_cell:
                    if cell in cell_visited:
                        cell_visited[cell] += 1
                    else:
                        cell_visited[cell] = 1
                    previous_cell = cell
        
        cell = list(cell_visited.keys())
        nb_of_visit = list(cell_visited.values())
        fig = plt.figure(figsize = (20, 5))
        plt.bar(cell, nb_of_visit, width = 0.4)
        x = np.arange(0, max_id+1, 1)
    
        plt.xlabel("cell id")
        plt.ylabel("Number of visit")
        plt.title("cell type by {user_id}".format(user_id=user))
        plt.xticks(x,x)
        path_name = "figures/cell_visited/cell_visited_{user_id}.png".format(user_id = user)
        plt.savefig(path_name)
        plt.close()
        
plot_visited_cells(query_by_user, pois_by_id)

In [16]:
def plot_number_of_meeting_between_user(queries):
    for user1, queries1 in queries.items():
        meeting = {}
        for query1 in queries1:
            for user2, queries2 in queries.items():
                if user1 != user2:
                    for query2 in queries2:
                        if query1['timestamp'] == query2['timestamp']:
                            if user2 in meeting:
                                meeting[user2] += 1
                            else:
                                meeting[user2] = 1
        users = list(meeting.keys())
        nb_of_meeting = list(meeting.values())
        fig = plt.figure(figsize = (30,5))
        plt.bar(users, nb_of_meeting, width=0.4)
        
        plt.xlabel("user met")
        plt.ylabel("Number of meeting")
        plt.title("meeting of {user_id} with other users".format(user_id=user1))
        path_name = "figures/meetings/meeting_of_{user_id}.png".format(user_id = user1)
        plt.savefig(path_name)
        plt.close()
plot_number_of_meeting_between_user(query_by_user)