# ***RHOBS Internship test V2***

<a id = "1"></a>
# <font style="color:blue;text-align:center;font-size:25px"><u>Importing/Loading the DataSet:</u></font>

In [None]:
# Import required libraries
import urllib.request
import zipfile
import os

# The URL of the data in the zip file
data_url = "http://rhobs-public.s3-website.eu-west-3.amazonaws.com/data.zip"

# The directory where to save the downloaded file
download_dir = "/content/"

# Create the directory if it doesn't exist
os.makedirs(download_dir, exist_ok=True)

# The file name for the downloaded zip file
zip_file_path = os.path.join(download_dir, "data.zip")

# Download the zip file from data_url to zip_file_path
urllib.request.urlretrieve(data_url, zip_file_path)


('/content/data.zip', <http.client.HTTPMessage at 0x7be61873ee30>)

In [None]:
# Extract the contents of the zip file into the data folder inside the /content folder
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(f"{download_dir}data")


In [None]:
# Listing the content of the directory data which is located inside /content
os.listdir(f"{download_dir}data")


['-']

In [None]:
# The files with their size inside the folder
!ls -al /content/data

total 18252440
-rw-r--r-- 1 root root 18690484646 Feb 21 11:31 -
drwxr-xr-x 2 root root        4096 Feb 21 11:27 .
drwxr-xr-x 1 root root        4096 Feb 21 11:27 ..


In [None]:
# Renaming the file
!mv /content/data/- /content/data/data.txt

In [None]:
# The files is not renamed with an extention of txt file
!ls -al /content/data

total 18252440
drwxr-xr-x 2 root root        4096 Feb 21 11:31 .
drwxr-xr-x 1 root root        4096 Feb 21 11:27 ..
-rw-r--r-- 1 root root 18690484646 Feb 21 11:31 data.txt


<a id = "2"></a>
# <font style="color:blue;text-align:center;font-size:25px"><u>Reading the data:</u></font>

In [1]:
import chardet
with open("/content/data/data.txt","rb") as data:
  # Print informations about the file(the encoding ...) by reading the first 10000 bytes from the file
  print(chardet.detect(data.read(10000)))


{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}


In [23]:
with open("/content/data/data.txt","r") as data:
  print("##############################\nthe data in the file\n##############################")
  print()
  for i in range(20):
    line=data.readline().strip()
    print(f"line_{i+1}: {line}")


##############################
the data in the file
##############################

line_1: Your mission shall you accept it is to
line_2: * Explain every column.
line_3: * Find the 3 richest person.
line_4: 
line_5: This is a simple test, it should not take you more 30minutes to code.
line_6: 
line_7: 
line_8: 
line_9: a,b,c,d,e,f,g
line_10: 0,longue description 0,Victor,Évrat,1779,0,1779
line_11: 1,longue description 1,Ugo,Ogier,3218,0,3218
line_12: 2,longue description 2,Gerard,Zola,3909,0,3909
line_13: 3,longue description 3,Ingrid,Quesada,2672,0,2672
line_14: 4,longue description 4,Xavier,Yvars,4224,0,4224
line_15: 5,longue description 5,Régis,Ferry,3479,0,3479
line_16: 6,longue description 6,Thierry,Zola,0,-402,-402
line_17: 7,longue description 7,Elodie,Hénaut,2623,0,2623
line_18: 8,longue description 0,Xavier,Zola,1249,0,1249
line_19: 9,longue description 1,Pascal,Cannet,1986,0,1986
line_20: 10,longue description 2,Victor,Martin,4417,0,4417


**=>As we can see data to read start from the line 10, to skip the details of the task in the beginning of the file and also the column**
- Note that total wealth in the last column is an integer value

<a id = "3"></a>
# <font style="color:blue;text-align:center;font-size:25px"><u>Explanation of every column:</u></font>

**Columns Explanation** :
* **a:** Index or identifier for each person in the dataset.

* **b:** Contains a "longue description" or a detailed description of each person.

*  **c:** The first name of each person.

* **d:** The last name of each person.

* **e:** Represent the total wealth or revenues of each person (numerical value).

* **f:** Represent an additional financial value or costs for each person(numerical value).

* **g:** The sum of columns e and f, representing the final net worth or total wealth after any adjustments(numerical value).

# Find the top 3 richest persons

In [28]:
# List to store the top 3 richest persons
top_richest = {}
NUM_HIGHEST = 3
# Using a context manager syntax to read the data from the file. (th file will be closed automatically)
with open("/content/data/data.txt", "r") as data:
  line_counter = 0
  while line := data.readline():
    # Stop when reaching the end of the file
    if not line:
      break
    # Increment the line counter
    line_counter += 1
    # Remove leading and trailing whitespace
    line = line.strip()
    # Skipping the first 9 lines
    # If the line is empty after stripping whitespace, skip it
    if (line_counter <= 9) or (not line):
      continue
    # Getting the id, firstname, lastname and final total wealth
    # Print(line)
    _, _, fname, lname, *_, net_wealth = line.split(",")
    fullname = f"{fname} {lname}"
    # Convert the total wealth to integer value
    net_wealth = int(net_wealth)
    if fullname in top_richest:
      # If the person is already in the dictionary, update their wealth if the current wealth is greater
      if net_wealth > top_richest[fullname]:
        top_richest[fullname] = net_wealth
    elif len(top_richest) < NUM_HIGHEST:
      # If the dictionary is not full yet, simply add the current person
      top_richest[fullname] = net_wealth
    else:
      # If the dictionary is full, find the person with the lowest wealth
      min_wealth_person = min(top_richest, key=top_richest.get)
      min_wealth = top_richest[min_wealth_person]
      if net_wealth > min_wealth:
        # If the current person is richer, remove the person with the lowest wealth
        del top_richest[min_wealth_person]
        # Add the current person to the dictionary
        top_richest[fullname] = net_wealth
  print(f"The number of lines in the file : {line_counter}")


The number of lines in the file : 300000009


In [29]:
# Sorting the top 3 richest persons by their historical net wealth
top_richest = [(fullname, net_wealth) for fullname, net_wealth in top_richest.items()]
top_richest = sorted(top_richest, key=lambda x: x[1], reverse=True)
for rank, (fullname, net_wealth) in enumerate(top_richest):
  print(f"rank {rank+1} goes to the person: {fullname}, with total wealth = {net_wealth}.")


rank 1 goes to the person: Zoé Walliand, with total wealth = 893775958.
rank 2 goes to the person: Gerard Parmentier, with total wealth = 892220563.
rank 3 goes to the person: Océane Urbain, with total wealth = 891910460.


Done by:
- AIT YOUB Abdelmoughit

# ***END***