In [2]:
import numpy as np
import pandas as pd


from dotenv import dotenv_values
from sqlalchemy import create_engine, types
from sqlalchemy.dialects.postgresql import JSON as postgres_json
from datetime import datetime

In [3]:
# getting the DB credentials

config = dotenv_values()
 
pg_user = config['POSTGRES_USER'] # align the key labels with your .env file
pg_host = config['POSTGRES_HOST']
pg_port = config['POSTGRES_PORT']
pg_db = config['POSTGRES_DB']
pg_schema = config['POSTGRES_SCHEMA']
pg_pass = config['POSTGRES_PASS']

In [4]:
# updating the url
url = f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}'

# creating the engine
engine = create_engine(url, echo=False)

In [5]:
engine.url # checking the url (password is hidden)

postgresql://mariabadanova:***@data-analytics-course-2.c8g8r1deus2v.eu-central-1.rds.amazonaws.com:5432/nf260325

## Read in the Businesses table from SQL

In [6]:
business_clean_df = pd.read_sql_table('business_clean', con=engine, schema= pg_schema)

In [7]:
business_clean_df.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,metro
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",,Santa Barbara
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ...",St. Louis
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ...",Tucson
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",Philadelphia
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",Philadelphia


## Cities: check distribution

In [8]:
metro_percentages = business_clean_df['metro'].value_counts(normalize=True) * 100
print(metro_percentages)

metro
Philadelphia     29.824538
Tampa            17.512937
St. Louis         8.685299
Nashville         8.018837
Indianapolis      7.480744
New Orleans       6.600774
Tucson            6.592793
Reno              5.131497
Edmonton          3.706783
Santa Barbara     3.460684
Boise             2.971147
Other             0.013968
Name: proportion, dtype: float64


## Key Categories: Assign

In [13]:
# define key categories
key_categories = ['Restaurants', 
                  'Coffee & Tea', 
                  'Shopping', 
                  'Automotive', 
                  'Beauty & Spas', 
                  'Health & Medical', 
                  'Hotels & Travel', 
                  'Active Life', 
                  'Arts & Entertainment', 
                  'Home Services', 
                  'Local Services',
                  'Nightlife']
len(key_categories)

12

In [14]:
# define the function which will assign key category labels (based on the first occurrence of a substring in the 'categories' column)
def assign_category(text):
    if not isinstance(text, str):
        return 'other'
    
    for substring in key_categories:
        if substring in text:  # no .lower()
            return substring
    return 'other'


In [15]:
# create a new column 'key_category' in the DataFrame
business_clean_df['key_category'] = business_clean_df['categories'].apply(assign_category)

In [16]:
business_clean_df.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,metro,key_category
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",,Santa Barbara,Health & Medical
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ...",St. Louis,Local Services
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ...",Tucson,Shopping
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",Philadelphia,Restaurants
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",Philadelphia,other


## Key Categories: Check distribution

In [17]:
cat_percentages = business_clean_df['key_category'].value_counts(normalize=True) * 100
print(cat_percentages)

key_category
Restaurants             34.765142
Shopping                15.719075
other                    8.631424
Beauty & Spas            8.191771
Home Services            7.013156
Automotive               6.514307
Health & Medical         5.174730
Active Life              3.409469
Hotels & Travel          3.235204
Local Services           2.742341
Coffee & Tea             1.762601
Arts & Entertainment     1.442672
Nightlife                1.398108
Name: proportion, dtype: float64


## Upload the new table to SQL

In [20]:
# define the data types

dtype_business = {
    'business_id': types.String,
    'name': types.String,
    'address': types.String,
    'city': types.String,
    'state': types.String,
    'postal_code': types.String,
    'latitude': types.Float,
    'longitude': types.Float,
    'stars': types.Float,
    'review_count': types.Integer,
    'is_open': types.Integer,
    'attributes': types.JSON,
    'categories': types.String,
    'hours': types.JSON,
    'key_categories': types.String
             }

In [21]:
# writing key categories dataframe to the database
business_clean_df.to_sql(name = 'business_clean', 
                       con = engine, 
                       schema = pg_schema, # pandas is allowing to specify, in which schema the table shall be created
                       if_exists='replace', 
                       dtype = dtype_business,
                       index=False
                      )

346

In [18]:
business_clean_df.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,metro,key_category
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",,Santa Barbara,Health & Medical
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ...",St. Louis,Local Services
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ...",Tucson,Shopping
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",Philadelphia,Restaurants
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",Philadelphia,other


## Work on a stratified sample

In [22]:
review_2019_df = pd.read_sql_table('review_2019', con=engine, schema= pg_schema)

In [23]:
merged_df = review_2019_df.merge(business_clean_df, on='business_id', how='left')

In [None]:
#merged_df = merged_df[merged_df['metro'] != 'Other']
merged_df = merged_df[~merged_df['metro'].isin(['Other', 'Edmonton'])]

In [50]:
cat_percentages = merged_df['key_category'].value_counts(normalize=True) * 100
print(cat_percentages)

key_category
Restaurants             67.018788
Shopping                 5.966705
Beauty & Spas            5.207460
other                    4.084424
Hotels & Travel          3.310017
Automotive               3.231194
Home Services            2.700169
Health & Medical         1.785953
Active Life              1.728202
Coffee & Tea             1.638452
Arts & Entertainment     1.242441
Nightlife                1.189595
Local Services           0.896600
Name: proportion, dtype: float64


In [51]:
city_percentages = merged_df['metro'].value_counts(normalize=True) * 100
print(city_percentages)

metro
Philadelphia     24.967222
Tampa            18.630818
New Orleans      11.497262
Nashville        10.007537
Indianapolis      7.799707
St. Louis         7.620097
Reno              6.463949
Tucson            6.274305
Santa Barbara     4.240621
Boise             2.498484
Name: proportion, dtype: float64


In [70]:
import pandas as pd

target_sample_size = 200_000

# Count category sizes
counts = merged_df['key_category'].value_counts()

# Define smallest categories to boost
smallest_cats = counts.nsmallest(12).index.tolist()  # adjust number as needed
largest_cat = counts.idxmax()

# Each small category should have 4% of total sample
boost_per_cat = int(target_sample_size * 0.04)

# Sample boosted small categories (with replacement if not enough data)
boosted_smallest = []
for cat in smallest_cats:
    cat_df = merged_df[merged_df['key_category'] == cat]
    n_samples = boost_per_cat
    replace = len(cat_df) < n_samples
    sampled_cat = cat_df.sample(n=n_samples, replace=replace, random_state=42)
    #print(f"Category '{cat}': requested {n_samples}, available {len(cat_df)}, replace={replace}") #check if replacement occurred
    print(f"Duplicates in sample (by index): {sampled_cat.index.duplicated().sum()}") #check if replacement occurred
    boosted_smallest.append(sampled_cat)

boosted_smallest = pd.concat(boosted_smallest)

# Remaining categories (excluding boosted small cats)
remaining_cats = counts.index.difference(smallest_cats)

# Remaining size to fill
remaining_size = target_sample_size - len(boosted_smallest)

# Calculate proportions for remaining cats
remaining_counts = counts.loc[remaining_cats]
remaining_frac = remaining_counts / remaining_counts.sum()

# Number of samples per remaining category
remaining_sample_sizes = (remaining_frac * remaining_size).round().astype(int)

# Sample remaining categories without replacement
remaining_samples = []
for cat, n in remaining_sample_sizes.items():
    cat_df = merged_df[merged_df['key_category'] == cat]
    n_samples = min(len(cat_df), n)
    sampled_cat = cat_df.sample(n=n_samples, replace=False, random_state=42)
    remaining_samples.append(sampled_cat)
remaining_sampled = pd.concat(remaining_samples)

# Combine boosted small cats and remaining samples
final_sample = pd.concat([boosted_smallest, remaining_sampled]).reset_index(drop=True)

# Check final distribution
print(final_sample['key_category'].value_counts(normalize=True))
print(f"Total sample size: {len(final_sample)}")

Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
Duplicates in sample (by index): 0
key_category
Restaurants             0.52
Local Services          0.04
Nightlife               0.04
Arts & Entertainment    0.04
Coffee & Tea            0.04
Active Life             0.04
Health & Medical        0.04
Home Services           0.04
Automotive              0.04
Hotels & Travel         0.04
other                   0.04
Beauty & Spas           0.04
Shopping                0.04
Name: proportion, dtype: float64
Total sample size: 200000


In [None]:
final_sample['key_category'].value_counts()

np.int64(200000)

In [62]:
final_sample.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 25 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   review_id     200000 non-null  object        
 1   user_id       200000 non-null  object        
 2   business_id   200000 non-null  object        
 3   stars_x       200000 non-null  int64         
 4   useful        200000 non-null  int64         
 5   funny         200000 non-null  int64         
 6   cool          200000 non-null  int64         
 7   text          200000 non-null  object        
 8   date          200000 non-null  datetime64[ns]
 9   name          200000 non-null  object        
 10  address       200000 non-null  object        
 11  city          200000 non-null  object        
 12  state         200000 non-null  object        
 13  postal_code   200000 non-null  object        
 14  latitude      200000 non-null  float64       
 15  longitude     200

In [63]:
sample_city_percentages = final_sample['metro'].value_counts(normalize=True) * 100
print(sample_city_percentages)

metro
Philadelphia     24.7550
Tampa            18.5830
New Orleans      11.3065
Nashville         9.6675
Indianapolis      7.8465
St. Louis         7.3325
Reno              6.9445
Tucson            6.5950
Santa Barbara     4.4285
Boise             2.5410
Name: proportion, dtype: float64


In [64]:
sample_cat_percentages = final_sample['key_category'].value_counts(normalize=True) * 100
print(sample_cat_percentages)

key_category
Restaurants             52.0
Local Services           4.0
Nightlife                4.0
Arts & Entertainment     4.0
Coffee & Tea             4.0
Active Life              4.0
Health & Medical         4.0
Home Services            4.0
Automotive               4.0
Hotels & Travel          4.0
other                    4.0
Beauty & Spas            4.0
Shopping                 4.0
Name: proportion, dtype: float64


In [71]:
final_sample.head()

Unnamed: 0,review_id,user_id,business_id,stars_x,useful,funny,cool,text,date,name,...,longitude,stars_y,review_count,is_open,attributes,categories,hours,metro,key_category,_group
0,28eJTfhgvTH8kp-uTxwiJQ,TdSL0iuzCTnk6w1ForNYaA,nrTM-aUY6pnJSN883JtTAA,1,1,1,0,I put my money in the machine and it took it. ...,2019-03-24 21:49:57,Carpinteria Coin-Op Laundry,...,-119.5118,1.5,11,0,,"Dry Cleaning & Laundry, Laundry Services, Loca...",,Santa Barbara,Local Services,Local Services_Santa Barbara
1,vApiFB2busO-8gmyTePMZg,2u9hFMDzUKjlohgu-arBfA,UpqeirS4fMMZmxYMfb9kbA,1,1,0,0,To top off that error...let's fast forward to ...,2019-02-02 13:46:29,Neptune Society,...,-82.737603,4.0,8,1,{'BusinessAcceptsCreditCards': 'True'},"Local Services, Funeral Services & Cemeteries","{'Monday': '8:30-17:0', 'Tuesday': '8:30-17:0'...",Tampa,Local Services,Local Services_Tampa
2,aT4BlRUQS72cj4I0JEzojg,ZLenvpRkaEZ-a9UJjdgqnQ,gHVHO7xQnj7ihlJJzpBe4g,5,1,0,0,"I must say, my kids loved going to A plus Lear...",2019-03-27 06:32:26,A Learning Center,...,-119.782133,4.5,6,1,{'BusinessAcceptsCreditCards': 'True'},"Child Care & Day Care, Education, Local Servic...","{'Monday': '6:30-18:0', 'Tuesday': '6:30-18:0'...",Reno,Local Services,Local Services_Reno
3,MzP1W-CAxUcwMHGOzwm_TQ,AW6o3QBHrs1na9r8Dcmodg,m_yauJ9qDf35tRxKB_L8OA,1,0,0,0,The worst excuse of a post office I have ever ...,2019-01-24 13:01:08,US Post Office,...,-75.225897,2.5,32,1,{'BusinessAcceptsCreditCards': 'True'},"Public Services & Government, Shipping Centers...","{'Monday': '8:0-16:30', 'Tuesday': '8:0-16:30'...",Philadelphia,Local Services,Local Services_Philadelphia
4,cYXvJuyJNRoxoHqYtx64Pw,lCKtrd5qMsbDWzH3MYTwHg,E81vHyNJtX0XJdTIinlajQ,1,4,0,1,I have been on hold with A&E in Reno NV for 44...,2019-12-04 00:15:03,A & E Factory Service,...,-119.813527,2.0,26,1,,"Local Services, Appliances & Repair",,Reno,Local Services,Local Services_Reno


In [85]:
df_remaining = merged_df[~merged_df["review_id"].isin(final_sample["review_id"])]

In [86]:
df_remaining.shape

(696944, 25)

In [87]:
df_remaining.head()

Unnamed: 0,review_id,user_id,business_id,stars_x,useful,funny,cool,text,date,name,...,longitude,stars_y,review_count,is_open,attributes,categories,hours,metro,key_category,_group
0,ZE6tN0haDTapEKjd98SW6Q,Ek-7WpJ-zcxtklmQ3_6beQ,E1Ksu62cz9-EvvbgOFedMw,1,4,0,0,I have spoke with several different department...,2019-02-06 18:38:45,PODS Moving & Storage,...,-82.661509,1.5,47,1,"{'BikeParking': 'False', 'BusinessAcceptsCredi...","Self Storage, Home Services, Movers, Packing S...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-17:0', '...",Tampa,Shopping,Shopping_Tampa
1,8uhhtC6-FKFqU6BLsKtSpg,nWRDqA-XXdju1jOMnN7QcA,IC1mLUQ_FmBEECFYjrDRfA,5,6,2,8,"Sleazed it up with their vegan brownie, which ...",2019-03-24 20:49:27,Bake'n Babes,...,-82.463661,4.0,142,1,"{'BusinessAcceptsCreditCards': 'True', 'BikePa...","Food, Bakeries, Desserts, Cupcakes","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",Tampa,other,other_Tampa
4,iy6XiIR-LdL3sGfO95d6uA,oNNL-ykTZx5S-xf66SOVjQ,IWqQjclp1fxK3vuqs5nNzA,4,2,0,0,"Fellow yelpers, I will begin my review with th...",2019-03-10 00:07:33,Bread Top House,...,-75.157311,4.5,152,1,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Food, Bakeries","{'Monday': '6:0-20:0', 'Tuesday': '6:0-20:0', ...",Philadelphia,other,other_Philadelphia
5,lk64Na1MvovzBGnAQqU3oA,Ga5GNyzqEnc4_U0LaZTjHg,IRMRvgD4uL4zTguZonV-uQ,1,0,0,0,Worst nail salon I've been to. My nails are al...,2019-08-13 15:59:21,Mi Nail Salon,...,-86.100298,4.5,259,1,"{'BusinessParking': '{'garage': False, 'street...","Hair Salons, Beauty & Spas, Nail Salons, Skin ...","{'Monday': '9:30-19:0', 'Tuesday': '9:30-17:0'...",Indianapolis,Beauty & Spas,Beauty & Spas_Indianapolis
6,3rJW3kwcb4C1v79-jr88Kw,mm6E4FbCMwJmb7kPDZ5v2Q,RCpFAdBZJM9ucpVMcYXiVA,5,1,0,0,"Over time, restaurants often loose quality in ...",2019-02-04 23:19:52,Gummlai Thai,...,-75.133142,4.5,168,1,"{'RestaurantsTakeOut': 'True', 'BusinessAccept...","Restaurants, Soup, Thai, Sushi Bars","{'Monday': '0:0-0:0', 'Tuesday': '16:0-20:0', ...",Philadelphia,Restaurants,Restaurants_Philadelphia


In [88]:
df_remaining['key_category'].value_counts()

key_category
Restaurants             497121
Shopping                 45518
Beauty & Spas            38708
other                    28635
Hotels & Travel          21689
Automotive               20982
Home Services            16219
Health & Medical          8019
Active Life               7501
Coffee & Tea              6696
Arts & Entertainment      3144
Nightlife                 2670
Local Services              42
Name: count, dtype: int64

In [89]:
final_sample['key_category'].value_counts()

key_category
Restaurants             104000
Local Services            8000
Nightlife                 8000
Arts & Entertainment      8000
Coffee & Tea              8000
Active Life               8000
Health & Medical          8000
Home Services             8000
Automotive                8000
Hotels & Travel           8000
other                     8000
Beauty & Spas             8000
Shopping                  8000
Name: count, dtype: int64

In [None]:
training_sample = 

In [98]:
final_sample.shape

(200000, 8)

In [96]:
final_sample = final_sample.iloc[:, :8]

In [97]:
final_sample.head()

Unnamed: 0,review_id,user_id,business_id,stars_x,useful,funny,cool,text
0,28eJTfhgvTH8kp-uTxwiJQ,TdSL0iuzCTnk6w1ForNYaA,nrTM-aUY6pnJSN883JtTAA,1,1,1,0,I put my money in the machine and it took it. ...
1,vApiFB2busO-8gmyTePMZg,2u9hFMDzUKjlohgu-arBfA,UpqeirS4fMMZmxYMfb9kbA,1,1,0,0,To top off that error...let's fast forward to ...
2,aT4BlRUQS72cj4I0JEzojg,ZLenvpRkaEZ-a9UJjdgqnQ,gHVHO7xQnj7ihlJJzpBe4g,5,1,0,0,"I must say, my kids loved going to A plus Lear..."
3,MzP1W-CAxUcwMHGOzwm_TQ,AW6o3QBHrs1na9r8Dcmodg,m_yauJ9qDf35tRxKB_L8OA,1,0,0,0,The worst excuse of a post office I have ever ...
4,cYXvJuyJNRoxoHqYtx64Pw,lCKtrd5qMsbDWzH3MYTwHg,E81vHyNJtX0XJdTIinlajQ,1,4,0,1,I have been on hold with A&E in Reno NV for 44...


In [99]:
# writing final sample dataframe to the database
final_sample.to_sql(name = 'training_sample', 
                       con = engine, 
                       schema = pg_schema, # pandas is allowing to specify, in which schema the table shall be created
                       if_exists='replace', 
                       index=False
                      )

1000