In [None]:
import pandas as pd
import numpy as np

In [None]:
prefix = '/content/drive'
from google.colab import drive
drive.mount(prefix, force_remount=True)

Now, you should see a directory named "drive" when you click the Files icon on the left. Expand this directory and navigate to the location you saved the census data. Control-click (right-click for PC) on the file and select "copy path". Then, paste the path into `file_path` variable in the cell below and run the cell.

In [None]:
scotland_fp = '/content/drive/MyDrive/CIS550/datasets/scotland_life_expectancy.csv'
england_ni_wales_fp = '/content/drive/MyDrive/CIS550/datasets/uk_life_expectancy_2020-22.csv'

The dataset is stored as a CSV, so we'll load it into a `DataFrame` using `pandas.read_csv` in the code cell below. `Pandas` also defines a number of other [IO functions](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html) that load the contents of common file types directly into `DataFrames`.

In [None]:
data_scotland = pd.read_csv(scotland_fp)
data_scotland.head()

In [None]:
data_england_ni_wales = pd.read_csv(england_ni_wales_fp, encoding='latin-1') # Try 'latin-1' encoding
data_england_ni_wales.head()

In [None]:
# Rename columns in Scotland data to match the other dataset
data_scotland = data_scotland.rename(columns={
    'Council area name': 'Area name',
    'Council area code': 'Area code',
    'Females': 'Female',
    'Males': 'Male'
})

# Add missing columns to the Scotland dataset
data_scotland['Country'] = 'Scotland'
data_scotland['Area type'] = 'Local Areas'
data_scotland = data_scotland.melt(id_vars=['Country', 'Area type', 'Area code', 'Area name'],
                                   var_name='Sex', value_name='Life expectancy (years)')

In [None]:
# Combine the datasets
combined_data = pd.concat([data_england_ni_wales, data_scotland], ignore_index=True)

In [None]:
# Handle missing values
combined_data = combined_data.fillna('')

In [None]:
# Display the combined dataframe
combined_data.head()

In [None]:
# Save the combined dataframe to a CSV file
combined_data.to_csv('/content/drive/My Drive/CIS550/datasets/combined_uk_life_expectancy.csv', index=False)