# Building the virtual environment for BitTorrent Demo

In [None]:
'''We simulate the environment in which BitTorrent runs. 
We create a virtual environment of --USERS-- with which the --CLIENT-- interacts. 

Each USER is uniquely identified by an --IP--, and has a folder that contains his files

in reality, there's a mechanism (trackers) that enables each user to hold an updated version of torrents. 
in this simulation the files are global and are always updated whenever a transfer or seeding takes place
'''

In [1]:
#first we import relevant libraries to be used throught the initializing of the virtual network
import glob
import os
import numpy as np
import hashlib
import random
import shutil
import json


# some functions for computing hashes and working with files

In [38]:
def rand_IP():
    '''returns a random IP address, e.g. 255.0.4.26
    each entry is between 0 and 255'''
    res = str(np.random.randint(0,256)) + '.' + str(np.random.randint(0,256))+ '.' + str(np.random.randint(0,256)) + '.' + str(np.random.randint(0,256))
    return res

def get_users_list(N):
    '''returns a list of N random IP addresses.
    These are not necessarily unique. Maybe user created two accounts...'''
    return [rand_IP() for i in range(N)]

def root_hash(lst):
    '''lst is a list of hashes
    the function builds a merkle tree and returns the root hash
    '''
    n=len(lst)
    if n==1:
        return lst[0]
    if n==2:
        m = hashlib.sha256()
        m.update(lst[0])
        m.update(lst[1])
        return m.digest()
    k=int((n+1)/2)
    return root_hash([root_hash(lst[:k]),root_hash(lst[k:])])

     
def hash_bytes(s):
    '''returns the sha256 of a the bytes buffer s as bytes'''
    '''if s is a list, ruturn a list of its hashes'''
    '''if s is neither a bytes buffer nor a list, convert it to bytes and hash'''
    if type(s)==bytes:
        m = hashlib.sha256() #creates a SHA-256 hash object
        m.update(s)
        return m.digest()
    else:
        if type(s)==list:
            return [hash_bytes(element) for element in s]
        else:
            return hash_bytes(bytes(str(s).encode()))
        
def name(location):
    '''location is a string that contains memory address of a file with a three letter extension, e.g., *.mp4
    return the name of the file, without the extension'''
    return location[::-1][4:location[::-1].index('\\')][::-1]

def to_pieces(in_loc, in_name, piece_size, out_folder):
    '''reads a file from location in_loc
    creates pieces of size piece_size in out_folder
    uses in_name to name output files
    returns a list of hases
    '''
    hashes = []
    file = open(in_loc,'rb')
    file_bytes = file.read()
    number_of_pieces = int(np.ceil(len(file_bytes)/piece_size))
    for i in range(number_of_pieces):
        out_file = open(out_folder+os.sep+in_name+"__piece_"+str(i),"wb")
        piece = file_bytes[i*piece_size:(i+1)*piece_size]
        out_file.write(piece)
        out_file.close()
        hashes.append(hash_bytes(piece))
    file.close()
    return hashes, len(file_bytes)

def piece_name(s):
    '''location is a string that contains memory address of a file with no extension
    return the name of the file'''
    return s[::-1][:s[::-1].index('\\')][::-1]
    

def parse_piece(x):
    '''x is a name of a piece, like this: "Name_of_Movie__piece_10"
    it ends with "__piece_" and the the piece number
    the function returns a string with the name of the file, and an integer that indicates the piece number'''
    num = x[::-1][:x[::-1].index('_')][::-1]
    return x[:-len(num)-8], int(num) 

def update_torrent(user_ip, movie_name, piece_number):
    '''the function updates the nodes section of movie_name in the global variable torrents dictionarie'''
    if user_ip not in torrents[movie_name]['nodes'][piece_number]:
        torrents[movie_name]['nodes'][piece_number].append(user_ip)
    return

def build_file(directory):
    '''directory contains pieces. The function builds an mp4 file'''
    pieces_locations = glob.glob(directory+os.sep+"**")
    file_name = piece_name(pieces_locations[0])[:-10]
    with open(directory+os.sep+file_name+".mp4","wb") as out_file:
        for in_piece in pieces_locations:
            with open(in_piece,"rb") as temp_piece_file:
                data = temp_piece_file.read()
                out_file.write(data)
    return

# Generate a list of virtual users by IP

In [3]:
#generate a list of users in the virtual network
lst = get_users_list(200)

#if we already have it we do:
# lst = []
# with open(r"Virtual_Network\list_of_users.txt","r") as file:
#     for line in file:
#         lst.append(line[:-1])

In [8]:
len(set(lst))

200

In [5]:
#keep this list as a file in the virtual network folder
file = open(r'Virtual_network\list_of_users.txt','w')
for ip in lst:
    file.write(ip+'\n')
file.close()

In [6]:
# create a folder for each user 
for ip in lst:
    os.mkdir(r'Virtual_Network\Users'+os.sep+ip)

# Seed first few movies and create their torrent files

In [11]:
# take movies from folder, split them to pieces, and give all the pieces of each movie to a single seeder user
# the i'th movie is seeded by the i'th user in lst
# and also create torrent files from seeders. 

MB = 2**20 #size of each piece of file

source_dir = r'Virtual_Network\mp4_files' # local folder to initialize simulation
file_locations = glob.glob(source_dir+os.sep+"**")
num_of_files = len(file_locations)
i=0
torrents={}
for file_loc in file_locations:
    current_user = lst[i]
    file_hashes , length = to_pieces(file_loc, name(file_loc), MB, r'Virtual_Network\Users'+os.sep+current_user)
    file_root = root_hash(file_hashes).hex()
    number_of_pieces = len(file_hashes)
    torrents[name(file_loc)]={
    "Announce" : "Virtual_Network\mp4_files",
    "file location" : file_loc,
    "file name" : name(file_loc),
    "root hash of file" : file_root,
    "ip of seeder" : lst[i],
    "file length in bytes" : length,
    "piece length in bytes" : MB,
    "number of pieces" : number_of_pieces,
    "hashes" : [file_hashes[j].hex() for j in range(number_of_pieces)],
    "nodes" : [[lst[i]] for j in range(number_of_pieces)]
    }
    i+=1

# Distribute pieces of files among users randomly

In [13]:
#now, for each of the other non-seeding users, randomly pick 3-6 pieces and give to them

pieces_locations = glob.glob(r'C:\Users\שרון\Virtual_Network\pieces\**')

for ip in lst[5:]:
    pieces_for_user = random.randint(3,6)
    pieces_for_user = random.sample(pieces_locations,pieces_for_user)
    for piece_loc in pieces_for_user:
        shutil.copyfile(piece_loc, r'Virtual_Network\Users'+os.sep+ip+os.sep+piece_name(piece_loc))

# update torrent files accordingly

In [None]:
#  we have a function that updates the torrent files from the network
# it goes over the network and updates the nodes section of the torrent that says which user holds which pieces
# first we create a dictionarie of torrents
# the keys are the names of the files, and the values are themselves dictionaries with the file details
# later we keep the torrents as json files in the Virtual_network folder, one for each file


In [14]:
#now we save each torrent as a json file in memory
# we can run this part again as needed after updating the torrents dictionairy
torrents_folder = r"Virtual_Network\torrents" 
for key in torrents:
    with open(torrents_folder+os.sep+key+'_torrent.txt', 'w') as outfile:
        json.dump(torrents[key], outfile)

In [17]:
# now we update the torrents
for cur_user in lst[5:]:
    user_pieces = glob.glob(r'C:\Users\שרון\Virtual_Network\Users'+os.sep+cur_user+os.sep+r'\**')
    for current_piece in user_pieces:
        current_mov_name, current_piece_number = parse_piece(piece_name(current_piece))
        update_torrent(cur_user, current_mov_name, current_piece_number)

In [18]:
# now we update the torrents files in memory like we did before
torrents_folder = r"C:\Users\שרון\Virtual_Network\torrents" 
for key in torrents:
    with open(torrents_folder+os.sep+key+'_torrent.txt', 'w') as outfile:
        json.dump(torrents[key], outfile)


In [None]:
# if necessary, we read the last torrents from memory usin json.read()
# with open('data.txt') as json_file:
#     data = json.load(json_file)