###  MRET xlsx to Tidy Data

Take the Trade in goods MRETS (all BOP - EU2013): time series dataset and convert to Tidy Data in CSV.

In [1]:
from databaker.framework import *
import pandas as pd

In [2]:
import requests
from pathlib import Path
from io import BytesIO
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
from cachecontrol.heuristics import LastModified

session = CacheControl(requests.Session(),
                       cache=FileCache('.cache'),
                       heuristic=LastModified())

inputURL = 'https://www.ons.gov.uk/file?uri=/economy/nationalaccounts/balanceofpayments/datasets/tradeingoodsmretsallbopeu2013timeseriesspreadsheet/current/mret.xlsx'
mretsExcel = BytesIO(session.get(inputURL).content)

In [3]:
tab = pd.read_excel(mretsExcel, header=None)
tab.rename(columns=tab.iloc[1], inplace=True)
tab.rename(columns={'CDID': 'Period'}, inplace=True)
tab

Unnamed: 0,Period,SESM,HCPC,SESX,SESQ,SESZ,SEUJ,SEUC,LKVB,HCRB,...,QALL,QALN,QALY,QALX,QALZ,QALU,SGRX,QALW,QALV,SGTK
0,Title,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6,Balance of payments: Trade in Goods: Oil Expor...,EU:BOP:EX:price index:NSA:Semi-manufactures: S...,EU:BOP:EX:SA:Semi-manufactures: SITC 5+6,EU:BOP:EX:volume index:SA:Semi-manufactures: S...,non-EU:BOP:EX:price index:NSA:Semi-manufacture...,non-EU:BOP:EX:SA:Semi-manufactures: SITC 5+6,EU(2004):BOP:IM:price index:NSA:Total trade in...,Balance of payments:Trade in Goods: North Amer...,...,BoP-consistent: Egypt: Exports: SA,BoP-consistent: Egypt: Balance: SA,Balance of payments: Trade in Goods: Ships: SI...,Balance of payments: Trade in Goods: Ships: SI...,Balance of payments: Ships: SITC 793:Balance: ...,Balance of payments: Trade in Goods: Aircraft:...,non-EU:BOP:EX:SA:Unspecified goods: SITC 9,Balance of payments: Trade in Goods: Aircraft:...,Balance of payments: Trade in Goods: Aircraft:...,non-EU:BOP:IM:SA:Unspecified goods: SITC 9
1,CDID,SESM,HCPC,SESX,SESQ,SESZ,SEUJ,SEUC,LKVB,HCRB,...,QALL,QALN,QALY,QALX,QALZ,QALU,SGRX,QALW,QALV,SGTK
2,PreUnit,,,,,,,,,,...,,,,,,,,,,
3,Unit,,,,,,,,,,...,,,,,,,,,,
4,Release Date,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,...,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018,11-04-2018
5,Next release,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,...,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018,10 May 2018
6,Important Notes,,,,,,,,,,...,,,,,,,,,,
7,1945,,,,,,,,,,...,,,,,,,,,,
8,1946,,,,,,,,,,...,,,,,,,,,,
9,1947,,,,,,,,,,...,,,,,,,,,,


In [4]:
observations = tab[7:].rename(columns={'CDID': 'Period'})
observations.head()

Unnamed: 0,Period,SESM,HCPC,SESX,SESQ,SESZ,SEUJ,SEUC,LKVB,HCRB,...,QALL,QALN,QALY,QALX,QALZ,QALU,SGRX,QALW,QALV,SGTK
7,1945,,,,,,,,,,...,,,,,,,,,,
8,1946,,,,,,,,,,...,,,,,,,,,,
9,1947,,,,,,,,,,...,,,,,,,,,,
10,1948,,,,,,,,,,...,,,,,,,,,,
11,1949,,,,,,,,,,...,,,,,,,,,,


In [5]:
new_table = pd.melt(observations, id_vars=['Period'], var_name='CDID', value_name='OBS')
new_table.dropna(inplace=True)
new_table.reset_index(drop=True, inplace=True)
print(len(new_table))
new_table.tail(5)

269112


Unnamed: 0,Period,CDID,OBS
269107,2017 OCT,SGTK,612
269108,2017 NOV,SGTK,1091
269109,2017 DEC,SGTK,872
269110,2018 JAN,SGTK,648
269111,2018 FEB,SGTK,439


In [6]:
new_table['Title'] = tab.iloc[0][new_table['CDID']].values
new_table.head()

Unnamed: 0,Period,CDID,OBS,Title
0,1998,SESM,-2766,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
1,1999,SESM,-2568,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
2,2000,SESM,-2441,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
3,2001,SESM,-3157,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
4,2002,SESM,-4703,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6


In [7]:
new_table['Period'].unique()

array(['1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005',
       '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '1998 Q1', '1998 Q2', '1998 Q3',
       '1998 Q4', '1999 Q1', '1999 Q2', '1999 Q3', '1999 Q4', '2000 Q1',
       '2000 Q2', '2000 Q3', '2000 Q4', '2001 Q1', '2001 Q2', '2001 Q3',
       '2001 Q4', '2002 Q1', '2002 Q2', '2002 Q3', '2002 Q4', '2003 Q1',
       '2003 Q2', '2003 Q3', '2003 Q4', '2004 Q1', '2004 Q2', '2004 Q3',
       '2004 Q4', '2005 Q1', '2005 Q2', '2005 Q3', '2005 Q4', '2006 Q1',
       '2006 Q2', '2006 Q3', '2006 Q4', '2007 Q1', '2007 Q2', '2007 Q3',
       '2007 Q4', '2008 Q1', '2008 Q2', '2008 Q3', '2008 Q4', '2009 Q1',
       '2009 Q2', '2009 Q3', '2009 Q4', '2010 Q1', '2010 Q2', '2010 Q3',
       '2010 Q4', '2011 Q1', '2011 Q2', '2011 Q3', '2011 Q4', '2012 Q1',
       '2012 Q2', '2012 Q3', '2012 Q4', '2013 Q1', '2013 Q2', '2013 Q3',
       '2013 Q4', '2014 Q1', '2014 Q2', '2014 Q3', '2014

In [8]:
import re
YEAR_RE = re.compile(r'[0-9]{4}')
YEAR_MONTH_RE = re.compile(r'([0-9]{4})\s+(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)')
YEAR_QUARTER_RE = re.compile(r'([0-9]{4})\s+(Q[1-4])')

# from https://stackoverflow.com/questions/597476/how-to-concisely-cascade-through-multiple-regex-statements-in-python
class Re(object):
  def __init__(self):
    self.last_match = None
  def fullmatch(self,pattern,text):
    self.last_match = re.fullmatch(pattern,text)
    return self.last_match

def time2period(t):
    gre = Re()
    if gre.fullmatch(YEAR_RE, t):
        return f"year/{t}"
    elif gre.fullmatch(YEAR_MONTH_RE, t):
        year, month = gre.last_match.groups()
        month_num = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06',
                     'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}.get(month)
        return f"month/{year}-{month_num}"
    elif gre.fullmatch(YEAR_QUARTER_RE, t):
        year, quarter = gre.last_match.groups()
        return f"quarter/{year}-{quarter}"
    else:
        print(f"no match for {t}")

new_table['Period'] = new_table['Period'].apply(time2period)
new_table

Unnamed: 0,Period,CDID,OBS,Title
0,year/1998,SESM,-2766,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
1,year/1999,SESM,-2568,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
2,year/2000,SESM,-2441,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
3,year/2001,SESM,-3157,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
4,year/2002,SESM,-4703,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
5,year/2003,SESM,-5435,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
6,year/2004,SESM,-4851,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
7,year/2005,SESM,-3673,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
8,year/2006,SESM,-3746,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6
9,year/2007,SESM,-5259,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6


In [9]:
new_table['Geography'] = 'K02000001'
new_table['Unit'] = '£ Millions'
new_table['Measure Type'] = 'GBP Total'
new_table.tail()

Unnamed: 0,Period,CDID,OBS,Title,Geography,Unit,Measure Type
269107,month/2017-10,SGTK,612,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269108,month/2017-11,SGTK,1091,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269109,month/2017-12,SGTK,872,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269110,month/2018-01,SGTK,648,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269111,month/2018-02,SGTK,439,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total


In [10]:
new_table.tail(5)

Unnamed: 0,Period,CDID,OBS,Title,Geography,Unit,Measure Type
269107,month/2017-10,SGTK,612,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269108,month/2017-11,SGTK,1091,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269109,month/2017-12,SGTK,872,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269110,month/2018-01,SGTK,648,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total
269111,month/2018-02,SGTK,439,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total


In [11]:
cord_sitc_classification_table_url = 'https://drive.google.com/uc?export=download&id=1uJck_DtSgLs0XcEuKDB0swzj1UrWmauj'
cord_sitc_classification_table = BytesIO(session.get(cord_sitc_classification_table_url).content)
temp_table = pd.read_excel(cord_sitc_classification_table, sheet_name = 0)
temp_table = temp_table.reset_index()
temp_table.drop(['level_0','level_1','Sequence'], axis =1, inplace = True)
temp_table.tail(5)


Unnamed: 0,cdid,COMMODITY,AREA,DIRECTION,BASIS,PRICE,SEASADJ,PERIOD
6196,BPFR,5min8minE,WW,IM,BOP,IDEF,NSA,M
6197,BPGR,5min8minE,WW,IM,BOP,IDEF,SA,M
6198,BPCR,5min8minE,WW,IM,BOP,VM,NSA,M
6199,ELAI,5min8minE,WW,IM,BOP,VM,SA,M
6200,,,,,,,,


In [12]:
codelist_url = 'https://drive.google.com/uc?export=download&id=161OtInylx2518gmhRu7UgUYnZZ_x9FQr'
classification4 = pd.read_csv(BytesIO(session.get(codelist_url).content))

In [13]:
CSDB_classification_tables_url = 'https://drive.google.com/uc?export=download&id=1miAzQ6s8om4Ark3BpRk3Y90OAWfWErTb'
sheets = pd.read_excel(BytesIO(session.get(CSDB_classification_tables_url).content), sheet_name=None)
sheets
classification1 = sheets['cord_sitc']
classification2 = sheets['cord_cpa']
classification3 = sheets['cord_country']



In [14]:
classification1.head(5)

Unnamed: 0,cdid,COMMODITY,AREA,DIRECTION,BASIS,PRICE,SEASADJ,PERIOD
0,SDSX,2plus4,EU,BAL,BOP,CP,NSA,Q
1,SGLO,5minus8,EU,BAL,BOP,CP,NSA,Q
2,SESL,5plus6,EU,BAL,BOP,CP,NSA,Q
3,SFJC,7plus8,EU,BAL,BOP,CP,NSA,Q
4,LKTX,TminusO,EU,BAL,BOP,CP,NSA,Q


In [15]:
classification2.head(5)

Unnamed: 0,cdid,PRODUCT,AREA,DIRECTION,BASIS,PRICE,SEASADJ,PERIOD
0,P42L,24.2,EU,EX,BOP,CP,NSA,Q
1,P483,24.2,EU,EX,BOP,CP,SA,Q
2,P4DJ,24.2,EU,EX,BOP,CVM,NSA,Q
3,P4IZ,24.2,EU,EX,BOP,CVM,SA,Q
4,P3EP,24.2,EU,IM,BOP,CP,NSA,Q


In [16]:
classification3.head(5)

Unnamed: 0,cdid,COUNTRY,DIRECTION,BASIS,SEASADJ,PERIOD
0,KN2O,XS,BAL,BOP,NSA,Q
1,LGDS,V4,BAL,BOP,NSA,Q
2,L87P,V3,BAL,BOP,NSA,Q
3,L87J,V2,BAL,BOP,NSA,Q
4,MHN8,I7,BAL,BOP,NSA,Q


In [17]:
classification4.head(5)

Unnamed: 0,cdid,COMMODITY,AREA,DIRECTION,BASIS,PRICE,SEASADJ,PERIOD
0,AJFB,Canadian dollar,UK,,BE,CP,NSA,
1,AJFD,Swiss franc,UK,,BE,CP,NSA,
2,AJFI,Swedish kroner,UK,,BE,CP,NSA,
3,AJFJ,Norwegian kroner,UK,,BE,CP,NSA,
4,AJFK,Danish kroner,UK,,BE,CP,NSA,


In [18]:
classification2.rename(index = str, columns = {'PRODUCT':'COMMODITY'}, inplace = True)

In [19]:
classification3['COMMODITY'] = ''
classification3['PRICE'] = ''

In [20]:
classification3.rename(index = str, columns = {'COUNTRY':'AREA'}, inplace = True)

In [21]:
temp_table = pd.concat([temp_table, classification1, classification2,classification3,classification4])

In [22]:
temp_table.head()

Unnamed: 0,AREA,BASIS,COMMODITY,DIRECTION,PERIOD,PRICE,SEASADJ,cdid
0,EU,BOP,2plus4,BAL,M,CP,NSA,SDSX
1,EU,BOP,5minus8,BAL,M,CP,NSA,SGLO
2,EU,BOP,7plus8,BAL,M,CP,NSA,SFJC
3,EU,BOP,TminusO,BAL,M,CP,NSA,LKTX
4,EU,BOP,0plus1,BAL,M,CP,NSA,SDMS


In [23]:
temp_table.shape

(18295, 8)

In [24]:
temp_table.drop_duplicates(['cdid'], keep='first', inplace=True)

In [25]:
temp_table.head()

Unnamed: 0,AREA,BASIS,COMMODITY,DIRECTION,PERIOD,PRICE,SEASADJ,cdid
0,EU,BOP,2plus4,BAL,M,CP,NSA,SDSX
1,EU,BOP,5minus8,BAL,M,CP,NSA,SGLO
2,EU,BOP,7plus8,BAL,M,CP,NSA,SFJC
3,EU,BOP,TminusO,BAL,M,CP,NSA,LKTX
4,EU,BOP,0plus1,BAL,M,CP,NSA,SDMS


In [26]:
new_table.head(5)

Unnamed: 0,Period,CDID,OBS,Title,Geography,Unit,Measure Type
0,year/1998,SESM,-2766,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6,K02000001,£ Millions,GBP Total
1,year/1999,SESM,-2568,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6,K02000001,£ Millions,GBP Total
2,year/2000,SESM,-2441,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6,K02000001,£ Millions,GBP Total
3,year/2001,SESM,-3157,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6,K02000001,£ Millions,GBP Total
4,year/2002,SESM,-4703,EU:BOP:Balance:SA:Semi-manufactures: SITC 5+6,K02000001,£ Millions,GBP Total


In [27]:
new_table.head(1)['CDID']

0    SESM
Name: CDID, dtype: object

In [28]:
temp_table.head(1)['cdid']

0    SDSX
Name: cdid, dtype: object

In [29]:
new_table = pd.merge(new_table,temp_table, how = 'left', left_on = 'CDID', right_on = 'cdid')

In [30]:
new_table.tail(5)

Unnamed: 0,Period,CDID,OBS,Title,Geography,Unit,Measure Type,AREA,BASIS,COMMODITY,DIRECTION,PERIOD,PRICE,SEASADJ,cdid
269107,month/2017-10,SGTK,612,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA,SGTK
269108,month/2017-11,SGTK,1091,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA,SGTK
269109,month/2017-12,SGTK,872,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA,SGTK
269110,month/2018-01,SGTK,648,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA,SGTK
269111,month/2018-02,SGTK,439,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA,SGTK


In [31]:
new_table.drop(['cdid'], axis = 1, inplace = True)

In [32]:
new_table.rename(index= str, columns= {'OBS':'Value'}, inplace = True)
new_table.rename(index= str, columns= {'DIRECTION':'Flow'}, inplace = True)
new_table.rename(index= str, columns= {'COMMODITY':'Product'}, inplace = True)
new_table.rename(index= str, columns= {'SEASADJ':'Seasonal Adjustment'}, inplace = True)

In [33]:
new_table.tail(5)

Unnamed: 0,Period,CDID,Value,Title,Geography,Unit,Measure Type,AREA,BASIS,Product,Flow,PERIOD,PRICE,Seasonal Adjustment
269107,month/2017-10,SGTK,612,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA
269108,month/2017-11,SGTK,1091,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA
269109,month/2017-12,SGTK,872,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA
269110,month/2018-01,SGTK,648,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA
269111,month/2018-02,SGTK,439,non-EU:BOP:IM:SA:Unspecified goods: SITC 9,K02000001,£ Millions,GBP Total,RW,BOP,9,IM,M,CP,SA


In [34]:
new_table =new_table[['AREA','Period','CDID','BASIS','Product','Seasonal Adjustment','Flow','PRICE','Measure Type','Value','Unit']]

In [35]:
new_table.head(5)

Unnamed: 0,AREA,Period,CDID,BASIS,Product,Seasonal Adjustment,Flow,PRICE,Measure Type,Value,Unit
0,EU,year/1998,SESM,BOP,5plus6,SA,BAL,CP,GBP Total,-2766,£ Millions
1,EU,year/1999,SESM,BOP,5plus6,SA,BAL,CP,GBP Total,-2568,£ Millions
2,EU,year/2000,SESM,BOP,5plus6,SA,BAL,CP,GBP Total,-2441,£ Millions
3,EU,year/2001,SESM,BOP,5plus6,SA,BAL,CP,GBP Total,-3157,£ Millions
4,EU,year/2002,SESM,BOP,5plus6,SA,BAL,CP,GBP Total,-4703,£ Millions


In [36]:
new_table.tail(5)

Unnamed: 0,AREA,Period,CDID,BASIS,Product,Seasonal Adjustment,Flow,PRICE,Measure Type,Value,Unit
269107,RW,month/2017-10,SGTK,BOP,9,SA,IM,CP,GBP Total,612,£ Millions
269108,RW,month/2017-11,SGTK,BOP,9,SA,IM,CP,GBP Total,1091,£ Millions
269109,RW,month/2017-12,SGTK,BOP,9,SA,IM,CP,GBP Total,872,£ Millions
269110,RW,month/2018-01,SGTK,BOP,9,SA,IM,CP,GBP Total,648,£ Millions
269111,RW,month/2018-02,SGTK,BOP,9,SA,IM,CP,GBP Total,439,£ Millions


Pull out missing Values

In [37]:
# temp_table1 = new_table[new_table['AREA'].isnull() == True] 
# Codelist = temp_table1['Cdid'].unique()
# Codelist.shape
# codes = pd.Series(Codelist)
# codes.to_csv('Codelist.csv', index = False)

In [38]:
new_table.shape

(269112, 11)

In [39]:
new_table = new_table[new_table['Value'] != 0]

In [40]:
new_table.fillna('NA', inplace = True)

In [41]:
new_table.Flow.unique()

array(['BAL', 'IM', 'EX', 'NA'], dtype=object)

In [42]:
new_table['Flow'] = new_table['Flow'].map(lambda cell:cell.replace('EX', 'Exports'))
new_table['Flow'] = new_table['Flow'].map(lambda cell:cell.replace('IM', 'Imports'))
new_table['Flow'] = new_table['Flow'].map(lambda cell:cell.replace('BAL', 'Balance'))

In [43]:
new_table.head(5)

Unnamed: 0,AREA,Period,CDID,BASIS,Product,Seasonal Adjustment,Flow,PRICE,Measure Type,Value,Unit
0,EU,year/1998,SESM,BOP,5plus6,SA,Balance,CP,GBP Total,-2766.0,£ Millions
1,EU,year/1999,SESM,BOP,5plus6,SA,Balance,CP,GBP Total,-2568.0,£ Millions
2,EU,year/2000,SESM,BOP,5plus6,SA,Balance,CP,GBP Total,-2441.0,£ Millions
3,EU,year/2001,SESM,BOP,5plus6,SA,Balance,CP,GBP Total,-3157.0,£ Millions
4,EU,year/2002,SESM,BOP,5plus6,SA,Balance,CP,GBP Total,-4703.0,£ Millions


In [44]:
new_table = new_table[(new_table['CDID'] != 'AJFB')&(new_table['CDID'] != 'AJFD')&
                                                     (new_table['CDID'] != 'AJFI')&
                                                      (new_table['CDID'] != 'AJFJ')&
                                                       (new_table['CDID'] != 'AJFK')&
                                                        (new_table['CDID'] != 'AJFO')&
                                                         (new_table['CDID'] != 'AJFP')&
                                                          (new_table['CDID'] != 'AJFU')&
                                                           (new_table['CDID'] != 'AJFV')&
                                                            (new_table['CDID'] != 'AJFW')&
                                                             (new_table['CDID'] != 'AUSS')&
                                                              (new_table['CDID'] != 'BK67')&
                                                               (new_table['CDID'] != 'THAP')]


In [45]:
new_table.dtypes

AREA                    object
Period                  object
CDID                    object
BASIS                   object
Product                 object
Seasonal Adjustment     object
Flow                    object
PRICE                   object
Measure Type            object
Value                  float64
Unit                    object
dtype: object

In [47]:
destinationFolder = Path('out')
destinationFolder.mkdir(exist_ok=True, parents=True)

new_table.to_csv(destinationFolder / ('MRET_Tidydata.csv'), index = False)