In [548]:
import pandas as pd

import sql_functions as sf

In [549]:
schema = 'organic_africa' # UPDATE 'TABLE_SCHEMA' based on schema used in class 
engine = sf.get_engine()

In [550]:
pur_tables = {}
pur_tables['OGPur__OGMB'] = sf.get_dataframe(f'SELECT * FROM {schema}."OGPur__OGMB"')
pur_tables['OG_Pur__OGOrg_1'] = sf.get_dataframe(f'SELECT * FROM {schema}."OG_Pur__OGOrg_1"')
pur_tables['wc_pur__WCOrg_1'] = sf.get_dataframe(f'SELECT * FROM {schema}."wc_pur__WCOrg_1"')
pur_tables['wc_pur_deviations__WCOrg_1'] = sf.get_dataframe(f'SELECT * FROM {schema}."wc_pur_deviations__WCOrg_1"')
pur_tables['wc_purchases_bayoba_1'] = sf.get_dataframe(f'SELECT * FROM {schema}."wc_purchases_bayoba_1"')
pur_tables['wc_purchases_deviations_bayoba_1'] = sf.get_dataframe(f'SELECT * FROM {schema}."wc_purchases_deviations_bayoba_1"')

In [551]:
del pur_tables['OGPur__OGMB']['Input_recovery']
del pur_tables['OG_Pur__OGOrg_1']['Input_recovery']

In [552]:
# column names lower case

for df in pur_tables.values():
    #print(df)
    df.columns = map(str.lower, df.columns)

pur_tables['OGPur__OGMB'].columns

Index(['id', 'og_nr', 'name_verified', 'date_of_purchase', 'receipt_nr',
       'product', 'grade', 'amount_in_kg', 'price_per_kg', 'total_payment_usd',
       'buyer', 'agr_year', 'comment', 'entered_by', 'source'],
      dtype='object')

In [553]:
pur_tables['OGPur__OGMB'].loc[pur_tables['OGPur__OGMB']['date_of_purchase']=='037/2021']
# 4097

Unnamed: 0,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source
4097,6657,18493.0,False,037/2021,6091.0,Rosella (subdariffa) petals (MB),A,11.0,0.85,9.0,Philip,2021.0,,Nyasha,OGOrg


In [554]:
pur_tables['OGPur__OGMB']['date_of_purchase'] = pur_tables['OGPur__OGMB'].iloc[4097]['date_of_purchase'].replace('037/2021', '03/07/2021')
pur_tables['OGPur__OGMB'].iloc[4097]['date_of_purchase']

'03/07/2021'

In [555]:
# Function to pad/fill to short dates like 5/3/18 to 05/03/2018

def str_date_pad(val):
    
    if type(val) is str and len(val) < 10: 

        date_splitted = val.split('/')
        print(date_splitted)
        if len(date_splitted[1]) < 2 : 
            date_splitted[1] = '0' + date_splitted[1]

        if (len(date_splitted[2]) < 4) and (int(date_splitted[2]) < 24) : 
            date_splitted[2] = '20' + date_splitted[2]
        if len(date_splitted[2]) < 4 and int(date_splitted[2]) > 23 : 
            date_splitted[2] = '19' + date_splitted[2]
    
        val = '/'.join(date_splitted)
        #print(val)
        return val
    
    else:
        return val

In [556]:
# Date_of_purchase to_datetime
pur_tables['OGPur__OGMB']['date_of_purchase'] = pur_tables['OGPur__OGMB']['date_of_purchase'].apply(str_date_pad)

In [557]:
pur_tables['OGPur__OGMB']['date_of_purchase'] = pd.to_datetime(pur_tables['OGPur__OGMB']['date_of_purchase'])

In [558]:
pur_tables['OG_Pur__OGOrg_1']['date_of_purchase'] = pd.to_datetime(pur_tables['OG_Pur__OGOrg_1']['date_of_purchase'])
pur_tables['wc_pur__WCOrg_1']['date_of_purchase'] = pd.to_datetime(pur_tables['wc_pur__WCOrg_1']['date_of_purchase'])
pur_tables['wc_pur_deviations__WCOrg_1']['date_of_purchase'] = pd.to_datetime(pur_tables['wc_pur_deviations__WCOrg_1']['date_of_purchase'])
pur_tables['wc_purchases_bayoba_1']['date_of_purchase'] = pd.to_datetime(pur_tables['wc_purchases_bayoba_1']['date_of_purchase'])
pur_tables['wc_purchases_deviations_bayoba_1']['date_of_purchase'] = pd.to_datetime(pur_tables['wc_purchases_deviations_bayoba_1']['date_of_purchase'])

  pur_tables['OG_Pur__OGOrg_1']['date_of_purchase'] = pd.to_datetime(pur_tables['OG_Pur__OGOrg_1']['date_of_purchase'])


#### Concat

In [559]:
for name, df in pur_tables.items():
    print(name)
    print(df.shape)

OGPur__OGMB
(6612, 15)
OG_Pur__OGOrg_1
(4144, 15)
wc_pur__WCOrg_1
(2731, 15)
wc_pur_deviations__WCOrg_1
(464, 15)
wc_purchases_bayoba_1
(10451, 15)
wc_purchases_deviations_bayoba_1
(650, 15)


In [560]:
concated_pur = pd.concat(pur_tables)
concated_pur.dtypes

id                            int64
og_nr                       float64
name_verified                  bool
date_of_purchase     datetime64[ns]
receipt_nr                   object
product                      object
grade                        object
amount_in_kg                float64
price_per_kg                float64
total_payment_usd           float64
buyer                        object
agr_year                     object
comment                      object
entered_by                   object
source                       object
wc_nr                       float64
dtype: object

#### Further Cleaning
- Receipt_Nr to_numeric

In [561]:
# concated_pur['receipt_nr']= pd.to_numeric(concated_pur['receipt_nr'])
# Unable to parse string "1753A" at position 8885

concated_pur['receipt_nr'] = concated_pur.iloc[8885]['receipt_nr'].replace('1753A', '1753')

concated_pur.iloc[8885]['receipt_nr']

'1753'

In [562]:
concated_pur['receipt_nr']= pd.to_numeric(concated_pur['receipt_nr'])

In [563]:
type(concated_pur['wc_nr'])

pandas.core.series.Series

In [564]:
wc = concated_pur.loc[pd.notna(concated_pur['wc_nr'])==True,'wc_nr']
for nr in wc: 
    if str(nr).endswith('.0')==False: print(nr)

0.268


In [565]:
0.268
concated_pur['wc_nr'] = concated_pur['wc_nr'].replace('0.268', '268.0')

In [566]:
# Correcting outliers

concated_pur['agr_year'] = concated_pur['agr_year'].replace('2013/14', '2013')
concated_pur['agr_year'] = concated_pur['agr_year'].replace('2014/15', '2014')
concated_pur['agr_year'] = concated_pur['agr_year'].replace('2021q', '2021')

In [567]:
concated_pur['agr_year'] = pd.to_datetime(concated_pur['agr_year'])
#concated_pur['agr_year'] = concated_pur['agr_year'].dt.year

In [568]:
concated_pur['agr_year'] = concated_pur['agr_year'].dt.strftime('%Y')
#concated_pur['agr_year'] = pd.to_datetime(concated_pur['agr_year']).dt.year

In [569]:
display(concated_pur.dtypes)
display(concated_pur.shape)
display(concated_pur.head())

id                            int64
og_nr                       float64
name_verified                  bool
date_of_purchase     datetime64[ns]
receipt_nr                    int64
product                      object
grade                        object
amount_in_kg                float64
price_per_kg                float64
total_payment_usd           float64
buyer                        object
agr_year                     object
comment                      object
entered_by                   object
source                       object
wc_nr                       float64
dtype: object

(25052, 16)

Unnamed: 0,Unnamed: 1,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source,wc_nr
OGPur__OGMB,0,1576,13232.0,False,2021-03-07,1753,Rosella (subdariffa) petals (MB),A,15.26,0.85,13.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,1,1577,13237.0,False,2021-03-07,1753,Rosella (subdariffa) petals (MB),A,74.555,0.85,63.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,2,1578,13241.0,False,2021-03-07,1753,Rosella (subdariffa) petals (MB),A,61.855,0.85,53.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,3,1579,13105.0,False,2021-03-07,1753,Rosella (subdariffa) petals (MB),A,6.955,0.85,6.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,4,1580,13032.0,False,2021-03-07,1753,Rosella (subdariffa) petals (MB),A,7.885,0.85,7.0,Philip,1970,,Nyasha,OGOrg,


##### check duplicates and unique columns

In [570]:
# Looking for Duplicates
concated_pur.duplicated().value_counts()

# NO Duplicates

False    25052
dtype: int64

In [571]:
# sum of null values each column
concated_pur.isnull().sum()

# comment 24300 nulls

id                       0
og_nr                14321
name_verified            0
date_of_purchase       181
receipt_nr               0
product                 14
grade                13825
amount_in_kg             3
price_per_kg            25
total_payment_usd        3
buyer                   16
agr_year                16
comment              24300
entered_by             132
source                   2
wc_nr                11015
dtype: int64

In [572]:
concated_pur[['og_nr']].squeeze().is_unique

False

In [573]:
concated_pur['product'] = concated_pur['product'].replace({ 'Rosella (subdariffa) petals (MB)': 'Rosella (subdariffa) petals (org)',
                                                            'Rosella (subdariffa) seed (MB)': 'Rosella (subdariffa) seed (org)',
                                                            'Devil’s Claw plant part (org) wet': 'Devil’s Claw plant part (org)',
                                                            'kalahari melon seed (org)': 'Kalahari melon seed (org)'})

concated_pur['grade'] = concated_pur['grade'].replace(' ', None)

In [574]:
for col in ['name_verified','receipt_nr', 'entered_by', 'buyer', 'grade','product']:
    display(col)
    display(concated_pur[col].value_counts())

'name_verified'

False    17456
True      7596
Name: name_verified, dtype: int64

'receipt_nr'

1753    25052
Name: receipt_nr, dtype: int64

'entered_by'

Nyasha            11483
Ruvimbo            4381
Batsirai           2400
Faith              1739
Norman             1709
Norman Kativhu     1140
Tracy               828
Charlie             633
Melody              294
Florence            210
Katinka             103
Name: entered_by, dtype: int64

'buyer'

Daniel             5146
Douglas            4227
Islum              2944
Philip             2404
Charles            2044
Wiseman            1528
Memory             1381
Andrew              900
Misheck             855
Ronald              755
Kennedy             703
Doreen              643
Pedzisai            642
Fungai              374
Hamid               193
Hadley              141
Nyasha              116
Nyasha Poto-Joe      28
Mandeya               9
d                     1
Meki Saizi            1
Katinka               1
Name: buyer, dtype: int64

'grade'

A    11187
B       23
C       15
Name: grade, dtype: int64

'product'

Baobab fruit whole (org)               9231
Rosella (subdariffa) petals (org)      7938
Gotu kola leaves (org)                 2086
Bird's Eye Chilli fruit whole (org)    1173
Rosella (subdariffa) seed (org)        1080
Baobab whole fruit (con)                650
Devil’s Claw root (org)                 519
Paprika fruit whole (org)               384
Devil’s Claw root (con)                 314
Trichillia emetica                      246
Ximenia Americana seed (org)            240
Baobab pulp and seed (org)              216
Strophanthus (kombe) seed (con)         163
Kalahari melon seed (org)               145
Ximenia Caffra Seed (org)               126
Gotu kola leaves (con)                  116
Marula seed (org)                        95
Devil’s Claw plant part (org)            78
Trichillia emetica (org)                 68
Marula Kennels                           41
Strophanthus (kombe) seed (org)          40
Devil’s Claw plant part (con)            32
Rosella petals (con)            

In [575]:
concated_pur

Unnamed: 0,Unnamed: 1,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source,wc_nr
OGPur__OGMB,0,1576,13232.0,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,15.260,0.85,13.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,1,1577,13237.0,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,74.555,0.85,63.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,2,1578,13241.0,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,61.855,0.85,53.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,3,1579,13105.0,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,6.955,0.85,6.0,Philip,1970,,Nyasha,OGOrg,
OGPur__OGMB,4,1580,13032.0,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,7.885,0.85,7.0,Philip,1970,,Nyasha,OGOrg,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wc_purchases_deviations_bayoba_1,645,646,,False,2015-08-07,1753,Baobab whole fruit (con),,64.000,0.10,6.0,Charles,1970,Not found in the Database,Batsirai,BAyoba,99901422.0
wc_purchases_deviations_bayoba_1,646,647,,False,2015-08-04,1753,Baobab whole fruit (con),,10.000,0.10,1.0,Charles,1970,Not found in the Database,Batsirai,BAyoba,99901431.0
wc_purchases_deviations_bayoba_1,647,648,,False,2015-08-31,1753,Baobab whole fruit (con),,32.000,0.10,3.0,Charles,1970,Not found in the Database,Batsirai,BAyoba,99900001.0
wc_purchases_deviations_bayoba_1,648,649,,False,2015-08-31,1753,Baobab whole fruit (con),,134.000,0.10,13.0,Charles,1970,Not found in the Database,Batsirai,BAyoba,99901614.0


#### This purchase table has transactions of 5989 WCs and 3367 OGs = ~9300 partner farmers

From **25052** transactions with 284 transactions without og or wc numbers

In [576]:
concated_pur[concated_pur['og_nr']==14710.0]

Unnamed: 0,Unnamed: 1,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source,wc_nr
OGPur__OGMB,1631,3853,14710.0,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,58.5,8.7,415.0,Philip,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,1778,4000,14710.0,False,2021-03-07,1753,Rosella (subdariffa) seed (org),A,38.0,2.2,83.0,Philip,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2401,4624,14710.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,5.1,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2509,4732,14710.0,True,2021-03-07,1753,Rosella (subdariffa) seed (org),A,4.0,2.2,8.7,Memory,1970,,Norman Kativhu,OGOrg,


In [577]:
concated_pur['wc_nr'].duplicated().value_counts()

True     19063
False     5989
Name: wc_nr, dtype: int64

In [578]:
concated_pur['og_nr'].duplicated().value_counts()

True     21685
False     3367
Name: og_nr, dtype: int64

In [579]:
concated_pur[concated_pur['id']==7572]

Unnamed: 0,Unnamed: 1,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source,wc_nr
OGPur__OGMB,1503,7572,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,


In [580]:
nr=concated_pur[~concated_pur['wc_nr'].isna() | ~concated_pur['og_nr'].isna()].shape
nr[0]

24768

In [581]:
all=concated_pur.shape
all[0]

25052

In [582]:
all[0]-nr[0]

284

In [583]:
# 57 
concated_pur[concated_pur['total_payment_usd']<=0][['id','og_nr','wc_nr','date_of_purchase','total_payment_usd']]

Unnamed: 0,Unnamed: 1,id,og_nr,wc_nr,date_of_purchase,total_payment_usd
OGPur__OGMB,1503,7572,,,2021-03-07,0.0
OGPur__OGMB,2315,4538,19206.0,,2021-03-07,0.0
OGPur__OGMB,2336,4559,21027.0,,2021-03-07,0.0
OGPur__OGMB,2358,4581,19135.0,,2021-03-07,0.0
OGPur__OGMB,2383,4606,19608.0,,2021-03-07,0.0
OGPur__OGMB,2386,4609,19621.0,,2021-03-07,0.0
OGPur__OGMB,2401,4624,14710.0,,2021-03-07,0.0
OGPur__OGMB,2402,4625,21850.0,,2021-03-07,0.0
OGPur__OGMB,2407,4630,19663.0,,2021-03-07,0.0
OGPur__OGMB,2408,4631,19665.0,,2021-03-07,0.0


In [584]:
concated_pur[concated_pur['receipt_nr']==1753].shape[0]

25052

## Dropping unnecessary 13 rows with too many nans

In [585]:
jkl = concated_pur[concated_pur['wc_nr'].isna() & concated_pur['og_nr'].isna()]
jkl = jkl[jkl['total_payment_usd']<=0]
jkl

Unnamed: 0,Unnamed: 1,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source,wc_nr
OGPur__OGMB,1503,7572,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,3251,7573,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,4001,6562,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,4017,7574,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,4539,7099,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,4540,7100,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,4541,7101,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,4542,7102,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,,
OGPur__OGMB,4590,7150,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,
OGPur__OGMB,4591,7151,,False,2021-03-07,1753,,,0.0,0.0,0.0,,,,,OGOrg,


In [586]:
multi_index_nan = jkl[jkl['total_payment_usd']<=0].index
multi_index_nan

# [1503, 3251, 4001, 4017, 4539, 4540, 4541, 4542, 4590, 4591, 4592, 4593, 4594]

MultiIndex([('OGPur__OGMB', 1503),
            ('OGPur__OGMB', 3251),
            ('OGPur__OGMB', 4001),
            ('OGPur__OGMB', 4017),
            ('OGPur__OGMB', 4539),
            ('OGPur__OGMB', 4540),
            ('OGPur__OGMB', 4541),
            ('OGPur__OGMB', 4542),
            ('OGPur__OGMB', 4590),
            ('OGPur__OGMB', 4591),
            ('OGPur__OGMB', 4592),
            ('OGPur__OGMB', 4593),
            ('OGPur__OGMB', 4594)],
           )

In [587]:
drop_list = [1503, 3251, 4001, 4017, 4539, 4540, 4541, 4542, 4590, 4591, 4592, 4593, 4594]

In [588]:
concated_pur.drop(concated_pur.index[drop_list], inplace=True)

In [589]:
concated_pur[concated_pur['wc_nr'].isna() & concated_pur['og_nr'].isna()]


Unnamed: 0,Unnamed: 1,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source,wc_nr
OGPur__OGMB,4641,7201,,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,34.00,0.85,20.00,Ronald,1970,,Nyasha,OGOrg,
OGPur__OGMB,5829,8393,,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,10.50,0.85,4.00,Kennedy,1970,,Nyasha,OGOrg,
OGPur__OGMB,5830,8394,,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,26.95,0.85,18.00,Kennedy,1970,,Nyasha,OGOrg,
OGPur__OGMB,5831,8395,,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,8.45,0.85,5.00,Kennedy,1970,,Nyasha,OGOrg,
OGPur__OGMB,5838,8402,,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,6.40,0.85,5.00,Kennedy,1970,,Nyasha,OGOrg,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wc_purchases_bayoba_1,10132,17344,,False,2022-06-16,1753,Baobab fruit whole (org),,112.00,0.14,15.68,Islum,2022,,Nyasha,BAyoba,
wc_purchases_bayoba_1,10236,17448,,False,2022-06-09,1753,Baobab fruit whole (org),,720.00,0.14,100.80,Islum,2022,,Nyasha,BAyoba,
wc_purchases_bayoba_1,10238,17450,,False,2022-06-09,1753,Baobab fruit whole (org),,324.00,0.14,45.36,Islum,2022,,Nyasha,BAyoba,
wc_purchases_bayoba_1,10406,17618,,False,2021-10-27,1753,Baobab fruit whole (org),,241.00,0.12,28.90,Douglas,2021,,Nyasha,BAyoba,


In [590]:
concated_pur[concated_pur['total_payment_usd']<=0]

Unnamed: 0,Unnamed: 1,id,og_nr,name_verified,date_of_purchase,receipt_nr,product,grade,amount_in_kg,price_per_kg,total_payment_usd,buyer,agr_year,comment,entered_by,source,wc_nr
OGPur__OGMB,2315,4538,19206.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,4.1,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2336,4559,21027.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,2.1,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2358,4581,19135.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,8.8,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2383,4606,19608.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,3.5,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2386,4609,19621.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,4.7,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2401,4624,14710.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,5.1,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2402,4625,21850.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,4.4,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2407,4630,19663.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,2.5,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2408,4631,19665.0,True,2021-03-07,1753,Rosella (subdariffa) petals (org),A,4.4,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,
OGPur__OGMB,2427,4650,19285.0,False,2021-03-07,1753,Rosella (subdariffa) petals (org),A,4.2,8.8,0.0,Memory,1970,,Norman Kativhu,OGOrg,


In [591]:
concated_pur[['id']].squeeze().is_unique

False

In [592]:
table_name = 'all_purchase'

if engine!=None:
    try:
        concated_pur.to_sql(name=table_name, # Name of SQL table
                                con=engine, # Engine or connection
                                if_exists='replace', # Drop the table before inserting new values 
                                schema=schema, # Use schmea that was defined earlier
                                index=False, # Write DataFrame index as a column
                                chunksize=5000, # Specify the number of rows in each batch to be written at a time
                                method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

The all_purchase table was imported successfully.
