In [None]:
import pandas as pd
import io
import os
import requests
import calendar

## Download Data

In [None]:
def download_csv_files(year:int , month:int,file_name) -> None:
    
    request_url =f"https://use-land-property-data.service.gov.uk/datasets/td/download/history/{calendar.month_name[month]}%20{year}/{file_name}.csv"
    r = requests.get(request_url)
    url_content=r.content
    csv_file = open(f'./data/{file_name}.csv', 'wb')
    csv_file.write(url_content)
    csv_file.close()

## Load Data

In [None]:
def load_csv_data (file_name: str) -> pd.DataFrame:
    df = pd.read_csv(f"./data/{file_name}.csv", sep=',', header=4, index_col=False)
    return df

## Cleaning

In [None]:
def drop_total_rows(df: pd.DataFrame) -> pd.DataFrame:
    new_df = df[df["Account Customer"].str.contains("Total")==False]
    return new_df

In [None]:
def drop_total_column(df: pd.DataFrame) -> pd.DataFrame:
    """ We are dropping the "Total" Column from the Dataframe """
    new_df = df.drop(["Total"], axis=1)
    return new_df

## Transformation

In [None]:
""" We need to add a date column for each DataFrame """
def add_month_column_to_dataframe (df: pd.DataFrame, year, month) -> pd.DataFrame:
    df["date_added"] = f"01.{month:02}.{year}"
    df["date_added"] = pd.to_datetime(df["date_added"])
    print(df.dtypes)
    return df

## Save Data

In [None]:
def save_cleand_transformed_data(df: pd.DataFrame, file_name) -> None:
    df.to_csv(f"./data/{file_name}.csv", index=False)

In [None]:
if __name__=="__main__":
    years = [2022]
    months = list(range(1,3))
    for year in years:
        for month in months:
            file_name = f"Number-and-types-of-applications-by-all-account-customers-{year}-{month:02}"
            download_csv_files(year, month, file_name)
            df = load_csv_data(file_name)
            df = drop_total_rows(df)
            df = drop_total_column(df)
            df = add_month_column_to_dataframe(df, year, month)
            df = save_cleand_transformed_data(df, file_name)
