# Importing required libraries

In [1]:
import os
import pandas as pd

# Reading data from file (csv database)

### Function to read raw data from the provided file

In [2]:
def read_table_data(file_path):
    """
    This functions reads the file provided and returns its contents
    Input : File name
    Output : Pandas dataframe
    """
    assert len(file_path) != 0, "Empty file path provided."
    
    assert os.path.exists(file_path), "File not found on the provided file path."
    
    return pd.read_csv(file_path)

### Reading data from file name 'name.csv'

In [3]:
data = read_table_data("data.csv")

### Testing read_table_data function for wrong input

In [4]:
test_data_1 = read_table_data("")

AssertionError: Empty file path provided.

In [5]:
test_data_2 = read_table_data("data")

AssertionError: File not found on the provided file path.

# Renaming columns

### Columns before renaming

In [6]:
data.columns

Index(['Customer_Name', 'Customer_Id', 'Open_Date', 'Last_Consulted_Date',
       'Vaccination_Id', 'Dr_Name', 'State', 'Country', 'DOB', 'Is_Active'],
      dtype='object')

### renaming

In [7]:
data = data.rename(columns ={"Customer_Name":"Name", "Customer_Id":"Cust_I", "Open_Date":"Open_Dt", 
                             "Last_Consulted_Date":"Consul_Dt", "Vaccination_Id":"VAC_ID","Dr_Name":"DR_Name", 
                             "State":"State", "Country":"County","DOB":"DOB", "Is_Active":"FLAG"})

### Columns after renaming

In [8]:
data.columns

Index(['Name', 'Cust_I', 'Open_Dt', 'Consul_Dt', 'VAC_ID', 'DR_Name', 'State',
       'County', 'DOB', 'FLAG'],
      dtype='object')

# Creating seperate dataframe for each country

### finding all the unique countries in the dataframe

In [9]:
list_of_countries = list(data['County'].unique())

### finding rows for similar countries

In [10]:
list_of_dataframe_of_countries = list()

for country in list_of_countries:
    filtered_dataframe = data.loc[data['County'] == country].drop(columns=["County"])
    list_of_dataframe_of_countries.append(filtered_dataframe)

### Saving all dataframes to files 

In [11]:
if not os.path.exists("./countries"):
    os.mkdir("./countries")

for index, dataframe_of_country in enumerate(list_of_dataframe_of_countries):
    # write to folder countries
    # file name is name of the country with csv extension
    file_name = "./countries/" + list_of_countries[index] + ".csv"
    dataframe_of_country.to_csv(file_name, index=False)

### Check if all the countries have been assigned a seperate file

In [12]:
list_of_countries_to_write = [country+".csv" for country in list_of_countries]
list_of_countries_written = os.listdir("./countries/")
assert list_of_countries_to_write.sort() == list_of_countries_written.sort(), "Written files mismatch the countries to write"