# Assignment 1
## Group Members:
* ### Nils Dunlop, e-mail: gusdunlni@student.gu.se
* ### Francisco Alejandro Erazo Piza, e-mail: guserafr@student.gu.se

## Problem 1: Plinth Volume Calculator

#### Define user input function

In [25]:
# Function to ask user for inputs and validate them before returning them
def ask_user_for_inputs():
    while True:
        try:
            width_highest_level = int(input("Enter the width of the plinth at the highest level in meters: "))
            width_ground_level = int(input("Enter the width of the plinth at the ground level in meters: "))
            
            # To avoid negative values
            if width_highest_level < 0 or width_ground_level < 0:
                raise ValueError("Width values cannot be negative.")
            
            # For the structure of the plinth, the width at the highest level cannot be greater than the width at the ground level
            if width_highest_level > width_ground_level:
                raise ValueError("Width at highest level cannot be greater than width at ground level.")
            
            break  # If we reached here without any exceptions, inputs are valid
            
        except ValueError as e:
            print(f"Invalid input: {e}. Please enter valid positive integers.")

    return width_highest_level, width_ground_level

# call the function and print the results
width_highest, width_ground = ask_user_for_inputs()

Enter the width of the plinth at the highest level in meters: 3
Enter the width of the plinth at the ground level in meters: 8


#### Define function to calculate plinth volume

In [26]:
# Function to calculate the floor's volume
def floor_volume(width_ground_level, width_highest_level):

    # Define the variables
    height = 1
    depth = 2
    levels = width_ground_level - width_highest_level
    volume = 0
    
    # Calculate the floor's volume
    for i in range(levels + 1):
        volume += (width_ground_level - i) * height * depth
    
    return volume

#### Call function to calculate plinth volume

In [27]:
# Call the function to calculate the floor's volume
volume = floor_volume(width_ground, width_highest)

print(f"Width at highest level: {width_highest}")
print(f"Width at ground level: {width_ground}")
print("Volume of the plinth: ", volume)

Width at highest level: 3
Width at ground level: 8
Volume of the plinth:  66


## Problem 2: Extracting and Processing EU Health Care Data

#### Import Necessary Libraries

In [28]:
import tarfile
import pandas as pd
import os

#### Define Helper Functions

In [29]:
def extract_tar_gz(file_name, extract_to):
    """Extract the tar.gz file."""
    with tarfile.open(file_name, 'r:gz') as file:
        file.extractall(path=extract_to)

def merge_files_to_csv(directory, output_file):
    """Merge all files in directory to a single CSV file."""
    all_data = []

    for file in os.listdir(directory):
        country_name = os.path.splitext(file)[0]  # Removing the file extension to get the country's name
        file_path = os.path.join(directory, file)
        
        data = pd.read_csv(file_path)
        data['Country'] = country_name  # Add a new 'Country' column
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    combined_data.to_csv(output_file, index=False)

def services_per_country(csv_file):
    """Return the number of health care services listed per country."""
    df = pd.read_csv(csv_file)
    count_df = df.groupby('Country').size().reset_index(name='Number of Services')
    return count_df

#### Extract Data

In [30]:
# Create a directory to extract the tar.gz data
cwd = os.getcwd()

# Only create the directory if it does not alrady exist
if not os.path.exists(os.path.join(cwd, "Extracted")):
    os.makedirs("Extracted")

# Attain the dynamic paths
extract_path = os.path.join(cwd, "Extracted")
tar_path = os.path.join(cwd, "A1.tar.gz")

# Extract tar.gz
extract_tar_gz(tar_path, extract_path)

#### Merge Country Datasets and Present Output

In [31]:
# Merge files inside 'data' directory to a single CSV
data_dir = os.path.join(extract_path, 'data')
output_csv = 'combined_data.csv'
merge_files_to_csv(data_dir, output_csv)

# Display number of services per country
df = services_per_country(output_csv)
print(df)

        Country  Number of Services
0       Austria                 266
1       Belgium                 272
2      Bulgaria                 673
3       Croatia                  61
4        Cyprus                  11
5       Czechia                 248
6       Denmark                  61
7       Finland                 372
8        France                5368
9       Germany                2809
10       Greece                 125
11      Hungary                 135
12      Ireland                 199
13        Italy                1283
14       Latvia                  44
15    Lithuania                 142
16   Luxembourg                  21
17        Malta                   9
18  Netherlands                 245
19       Norway                 640
20       Poland                 466
21     Portugal                  94
22      Romania                 179
23     Slovakia                 128
24     Slovenia                  31
25        Spain                 809
26       Sweden             