# Solutions to Exercises

This notebook is based on Anna-Lena Lamprecht's CoTaPP repository (https://github.com/annalenalamprecht/CoTaPP). Some modifications were made.

## Unit 5.1: Error Handling

### Challenge

In [None]:
import csv

def print_selected_municipalities():
    with open("data/dutch_municipalities.csv", "r") as csvfile:
        csvreader = csv.DictReader(csvfile, delimiter='\t')
        for row in csvreader:
            try: 
                if int(row["avg_household_income_2012"]) > 40000:
                    print(f'{row["municipality"]}: {row["province"]}')
            except ValueError:
                print(f'No INCOME for --> {row["municipality"]}: {row["province"]}')

In [None]:
print_selected_municipalities()

### 1. Interview Anonymization

In [None]:
interview_file = "data/interview-with-a-syrian-refugee.txt"
new_file = "data/interview-with-a-syrian-refugee-anonymized.txt"

try:
    # read original interview text from file
    with open(interview_file, "r") as file:
        text = file.read()

    # write obfuscated interview text to file
    with open(new_file, "w") as file:
        file.write(text.replace("Samira","Amal"))

# if interview file is not found, inform user accordingly
except FileNotFoundError:
    print(f"File {interview_file} not found.")

# for any other error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err)

### 2. Randomized Story-Telling

In [None]:
import pandas as pd
import sys
import random

# set path to input file
infile = "data/inputs.csv"

try:
    # read input file as dataframe
    df_in = pd.read_csv(infile, sep=",")

# for any error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err) 
    sys.exit()


# ask user how many sentences should be created
while True:
    try:
        number = int(input("How many sentences do you want to create? "))
        break
    except ValueError:
        print("That was no valid number. Try again.") 
    
# create the desired number of sentences
while number > 0:

    # select a random value for each of the four sentence elements
    who = df_in.loc[random.randint(0,df_in["who"].size-1),"who"]
    does_what = df_in.loc[random.randint(0,df_in["does what"].size-1),"does what"]
    how = df_in.loc[random.randint(0,df_in["how"].size-1),"how"]
    where = df_in.loc[random.randint(0,df_in["where"].size-1),"where"]
    
    print(f"{who} {does_what} {how} {where}.")

    number -= 1

### 3. Population and Universities per Province

In [None]:
import pandas as pd
import sys

# set paths to input and output file
infile = "data/dutch_municipalities.csv"
outfile = "data/dutch_provinces.csv"


try:
    # read input file as dataframe
    df_in = pd.read_csv(infile, sep="\t")

# for any error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err) 
    sys.exit()

# init new empty dataframe with the wanted columns
df_out = pd.DataFrame(columns=["province", "population", "universities"])

# get province names (as sorted set)
provinces = sorted(set(df_in["province"]))

# for all provinces ...
for province in provinces:
    # get the part of the dataframe for the province
    df_province = df_in[df_in["province"]==province]
    
    # sum up universities and population and add to new data frame
    df_out = df_out.append({"province":province,\
                            "population":df_province["population"].sum(),\
                            "universities":df_province["university"].sum()},\
                            ignore_index=True)
  
try:
    # save new dataframe as csv file
    df_out.to_csv(outfile, index=False)

# for any error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err) 
    sys.exit()
    

# Another possible, but longer solution is with the csv package and 
# dictionaries, as shown below (without try/except error handling).
#
# import csv
#
## create two empty dictionaries to collect the aggregated data
#universities_per_province = {}
#population_per_province = {}
#
## read in the data and iterate over all rows, adding up
## population and university numbers per province
#with open("dutch_municipalities.csv", "r") as csvfile:
#    csvreader = csv.DictReader(csvfile, delimiter='\t')
#    for row in csvreader:
#        if row["province"] not in universities_per_province:
#            universities_per_province[row["province"]] = int(row["university"])